4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _AnnotateDiskParams(instance, devs, cfg):
589 """Little helper wrapper to the rpc annotation method.
591 @param instance: The instance object
592 @type devs: List of L{objects.Disk}
593 @param devs: The root devices (not any of its children!)
594 @param cfg: The config object
595 @returns The annotated disk copies
596 @see L{rpc.AnnotateDiskParams}
599 return rpc.AnnotateDiskParams(instance.disk_template, devs,
600 cfg.GetInstanceDiskParams(instance))
603 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
605 """Checks if node groups for locked instances are still correct.
607 @type cfg: L{config.ConfigWriter}
608 @param cfg: Cluster configuration
609 @type instances: dict; string as key, L{objects.Instance} as value
610 @param instances: Dictionary, instance name as key, instance object as value
611 @type owned_groups: iterable of string
612 @param owned_groups: List of owned groups
613 @type owned_nodes: iterable of string
614 @param owned_nodes: List of owned nodes
615 @type cur_group_uuid: string or None
616 @param cur_group_uuid: Optional group UUID to check against instance's groups
619 for (name, inst) in instances.items():
620 assert owned_nodes.issuperset(inst.all_nodes), \
621 "Instance %s's nodes changed while we kept the lock" % name
623 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
625 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
626 "Instance %s has no node in group %s" % (name, cur_group_uuid)
629 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
631 """Checks if the owned node groups are still correct for an instance.
633 @type cfg: L{config.ConfigWriter}
634 @param cfg: The cluster configuration
635 @type instance_name: string
636 @param instance_name: Instance name
637 @type owned_groups: set or frozenset
638 @param owned_groups: List of currently owned node groups
639 @type primary_only: boolean
640 @param primary_only: Whether to check node groups for only the primary node
643 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
645 if not owned_groups.issuperset(inst_groups):
646 raise errors.OpPrereqError("Instance %s's node groups changed since"
647 " locks were acquired, current groups are"
648 " are '%s', owning groups '%s'; retry the"
651 utils.CommaJoin(inst_groups),
652 utils.CommaJoin(owned_groups)),
658 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
659 """Checks if the instances in a node group are still correct.
661 @type cfg: L{config.ConfigWriter}
662 @param cfg: The cluster configuration
663 @type group_uuid: string
664 @param group_uuid: Node group UUID
665 @type owned_instances: set or frozenset
666 @param owned_instances: List of currently owned instances
669 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
670 if owned_instances != wanted_instances:
671 raise errors.OpPrereqError("Instances in node group '%s' changed since"
672 " locks were acquired, wanted '%s', have '%s';"
673 " retry the operation" %
675 utils.CommaJoin(wanted_instances),
676 utils.CommaJoin(owned_instances)),
679 return wanted_instances
682 def _SupportsOob(cfg, node):
683 """Tells if node supports OOB.
685 @type cfg: L{config.ConfigWriter}
686 @param cfg: The cluster configuration
687 @type node: L{objects.Node}
688 @param node: The node
689 @return: The OOB script if supported or an empty string otherwise
692 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
695 def _CopyLockList(names):
696 """Makes a copy of a list of lock names.
698 Handles L{locking.ALL_SET} correctly.
701 if names == locking.ALL_SET:
702 return locking.ALL_SET
707 def _GetWantedNodes(lu, nodes):
708 """Returns list of checked and expanded node names.
710 @type lu: L{LogicalUnit}
711 @param lu: the logical unit on whose behalf we execute
713 @param nodes: list of node names or None for all nodes
715 @return: the list of nodes, sorted
716 @raise errors.ProgrammerError: if the nodes parameter is wrong type
720 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
722 return utils.NiceSort(lu.cfg.GetNodeList())
725 def _GetWantedInstances(lu, instances):
726 """Returns list of checked and expanded instance names.
728 @type lu: L{LogicalUnit}
729 @param lu: the logical unit on whose behalf we execute
730 @type instances: list
731 @param instances: list of instance names or None for all instances
733 @return: the list of instances, sorted
734 @raise errors.OpPrereqError: if the instances parameter is wrong type
735 @raise errors.OpPrereqError: if any of the passed instances is not found
739 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
741 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
745 def _GetUpdatedParams(old_params, update_dict,
746 use_default=True, use_none=False):
747 """Return the new version of a parameter dictionary.
749 @type old_params: dict
750 @param old_params: old parameters
751 @type update_dict: dict
752 @param update_dict: dict containing new parameter values, or
753 constants.VALUE_DEFAULT to reset the parameter to its default
755 @param use_default: boolean
756 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
757 values as 'to be deleted' values
758 @param use_none: boolean
759 @type use_none: whether to recognise C{None} values as 'to be
762 @return: the new parameter dictionary
765 params_copy = copy.deepcopy(old_params)
766 for key, val in update_dict.iteritems():
767 if ((use_default and val == constants.VALUE_DEFAULT) or
768 (use_none and val is None)):
774 params_copy[key] = val
778 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
779 """Return the new version of a instance policy.
781 @param group_policy: whether this policy applies to a group and thus
782 we should support removal of policy entries
785 use_none = use_default = group_policy
786 ipolicy = copy.deepcopy(old_ipolicy)
787 for key, value in new_ipolicy.items():
788 if key not in constants.IPOLICY_ALL_KEYS:
789 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
791 if key in constants.IPOLICY_ISPECS:
792 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
793 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
795 use_default=use_default)
797 if (not value or value == [constants.VALUE_DEFAULT] or
798 value == constants.VALUE_DEFAULT):
802 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
803 " on the cluster'" % key,
806 if key in constants.IPOLICY_PARAMETERS:
807 # FIXME: we assume all such values are float
809 ipolicy[key] = float(value)
810 except (TypeError, ValueError), err:
811 raise errors.OpPrereqError("Invalid value for attribute"
812 " '%s': '%s', error: %s" %
813 (key, value, err), errors.ECODE_INVAL)
815 # FIXME: we assume all others are lists; this should be redone
817 ipolicy[key] = list(value)
819 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
820 except errors.ConfigurationError, err:
821 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
826 def _UpdateAndVerifySubDict(base, updates, type_check):
827 """Updates and verifies a dict with sub dicts of the same type.
829 @param base: The dict with the old data
830 @param updates: The dict with the new data
831 @param type_check: Dict suitable to ForceDictType to verify correct types
832 @returns: A new dict with updated and verified values
836 new = _GetUpdatedParams(old, value)
837 utils.ForceDictType(new, type_check)
840 ret = copy.deepcopy(base)
841 ret.update(dict((key, fn(base.get(key, {}), value))
842 for key, value in updates.items()))
846 def _MergeAndVerifyHvState(op_input, obj_input):
847 """Combines the hv state from an opcode with the one of the object
849 @param op_input: The input dict from the opcode
850 @param obj_input: The input dict from the objects
851 @return: The verified and updated dict
855 invalid_hvs = set(op_input) - constants.HYPER_TYPES
857 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
858 " %s" % utils.CommaJoin(invalid_hvs),
860 if obj_input is None:
862 type_check = constants.HVSTS_PARAMETER_TYPES
863 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
868 def _MergeAndVerifyDiskState(op_input, obj_input):
869 """Combines the disk state from an opcode with the one of the object
871 @param op_input: The input dict from the opcode
872 @param obj_input: The input dict from the objects
873 @return: The verified and updated dict
876 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
878 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
879 utils.CommaJoin(invalid_dst),
881 type_check = constants.DSS_PARAMETER_TYPES
882 if obj_input is None:
884 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
886 for key, value in op_input.items())
891 def _ReleaseLocks(lu, level, names=None, keep=None):
892 """Releases locks owned by an LU.
894 @type lu: L{LogicalUnit}
895 @param level: Lock level
896 @type names: list or None
897 @param names: Names of locks to release
898 @type keep: list or None
899 @param keep: Names of locks to retain
902 assert not (keep is not None and names is not None), \
903 "Only one of the 'names' and the 'keep' parameters can be given"
905 if names is not None:
906 should_release = names.__contains__
908 should_release = lambda name: name not in keep
910 should_release = None
912 owned = lu.owned_locks(level)
914 # Not owning any lock at this level, do nothing
921 # Determine which locks to release
923 if should_release(name):
928 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
930 # Release just some locks
931 lu.glm.release(level, names=release)
933 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
936 lu.glm.release(level)
938 assert not lu.glm.is_owned(level), "No locks should be owned"
941 def _MapInstanceDisksToNodes(instances):
942 """Creates a map from (node, volume) to instance name.
944 @type instances: list of L{objects.Instance}
945 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
948 return dict(((node, vol), inst.name)
949 for inst in instances
950 for (node, vols) in inst.MapLVsByNode().items()
954 def _RunPostHook(lu, node_name):
955 """Runs the post-hook for an opcode on a single node.
958 hm = lu.proc.BuildHooksManager(lu)
960 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 except Exception, err: # pylint: disable=W0703
962 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
965 def _CheckOutputFields(static, dynamic, selected):
966 """Checks whether all selected fields are valid.
968 @type static: L{utils.FieldSet}
969 @param static: static fields set
970 @type dynamic: L{utils.FieldSet}
971 @param dynamic: dynamic fields set
978 delta = f.NonMatching(selected)
980 raise errors.OpPrereqError("Unknown output fields selected: %s"
981 % ",".join(delta), errors.ECODE_INVAL)
984 def _CheckGlobalHvParams(params):
985 """Validates that given hypervisor params are not global ones.
987 This will ensure that instances don't get customised versions of
991 used_globals = constants.HVC_GLOBALS.intersection(params)
993 msg = ("The following hypervisor parameters are global and cannot"
994 " be customized at instance level, please modify them at"
995 " cluster level: %s" % utils.CommaJoin(used_globals))
996 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
999 def _CheckNodeOnline(lu, node, msg=None):
1000 """Ensure that a given node is online.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @param msg: if passed, should be a message to replace the default one
1005 @raise errors.OpPrereqError: if the node is offline
1009 msg = "Can't use offline node"
1010 if lu.cfg.GetNodeInfo(node).offline:
1011 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014 def _CheckNodeNotDrained(lu, node):
1015 """Ensure that a given node is not drained.
1017 @param lu: the LU on behalf of which we make the check
1018 @param node: the node to check
1019 @raise errors.OpPrereqError: if the node is drained
1022 if lu.cfg.GetNodeInfo(node).drained:
1023 raise errors.OpPrereqError("Can't use drained node %s" % node,
1027 def _CheckNodeVmCapable(lu, node):
1028 """Ensure that a given node is vm capable.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is not vm capable
1035 if not lu.cfg.GetNodeInfo(node).vm_capable:
1036 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041 """Ensure that a node supports a given OS.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @param os_name: the OS to query about
1046 @param force_variant: whether to ignore variant errors
1047 @raise errors.OpPrereqError: if the node is not supporting the OS
1050 result = lu.rpc.call_os_get(node, os_name)
1051 result.Raise("OS '%s' not in supported OS list for node %s" %
1053 prereq=True, ecode=errors.ECODE_INVAL)
1054 if not force_variant:
1055 _CheckOSVariant(result.payload, os_name)
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059 """Ensure that a node has the given secondary ip.
1061 @type lu: L{LogicalUnit}
1062 @param lu: the LU on behalf of which we make the check
1064 @param node: the node to check
1065 @type secondary_ip: string
1066 @param secondary_ip: the ip to check
1067 @type prereq: boolean
1068 @param prereq: whether to throw a prerequisite or an execute error
1069 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074 result.Raise("Failure checking secondary ip on node %s" % node,
1075 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 if not result.payload:
1077 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078 " please fix and re-run this command" % secondary_ip)
1080 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082 raise errors.OpExecError(msg)
1085 def _GetClusterDomainSecret():
1086 """Reads the cluster domain secret.
1089 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094 """Ensure that an instance is in one of the required states.
1096 @param lu: the LU on behalf of which we make the check
1097 @param instance: the instance to check
1098 @param msg: if passed, should be a message to replace the default one
1099 @raise errors.OpPrereqError: if the instance is not in the required state
1103 msg = ("can't use instance from outside %s states" %
1104 utils.CommaJoin(req_states))
1105 if instance.admin_state not in req_states:
1106 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1107 (instance.name, instance.admin_state, msg),
1110 if constants.ADMINST_UP not in req_states:
1111 pnode = instance.primary_node
1112 if not lu.cfg.GetNodeInfo(pnode).offline:
1113 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1114 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1115 prereq=True, ecode=errors.ECODE_ENVIRON)
1116 if instance.name in ins_l.payload:
1117 raise errors.OpPrereqError("Instance %s is running, %s" %
1118 (instance.name, msg), errors.ECODE_STATE)
1120 lu.LogWarning("Primary node offline, ignoring check that instance"
1124 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1125 """Computes if value is in the desired range.
1127 @param name: name of the parameter for which we perform the check
1128 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1130 @param ipolicy: dictionary containing min, max and std values
1131 @param value: actual value that we want to use
1132 @return: None or element not meeting the criteria
1136 if value in [None, constants.VALUE_AUTO]:
1138 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1139 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1140 if value > max_v or min_v > value:
1142 fqn = "%s/%s" % (name, qualifier)
1145 return ("%s value %s is not in range [%s, %s]" %
1146 (fqn, value, min_v, max_v))
1150 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1151 nic_count, disk_sizes, spindle_use,
1152 _compute_fn=_ComputeMinMaxSpec):
1153 """Verifies ipolicy against provided specs.
1156 @param ipolicy: The ipolicy
1158 @param mem_size: The memory size
1159 @type cpu_count: int
1160 @param cpu_count: Used cpu cores
1161 @type disk_count: int
1162 @param disk_count: Number of disks used
1163 @type nic_count: int
1164 @param nic_count: Number of nics used
1165 @type disk_sizes: list of ints
1166 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1167 @type spindle_use: int
1168 @param spindle_use: The number of spindles this instance uses
1169 @param _compute_fn: The compute function (unittest only)
1170 @return: A list of violations, or an empty list of no violations are found
1173 assert disk_count == len(disk_sizes)
1176 (constants.ISPEC_MEM_SIZE, "", mem_size),
1177 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1178 (constants.ISPEC_DISK_COUNT, "", disk_count),
1179 (constants.ISPEC_NIC_COUNT, "", nic_count),
1180 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1181 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1182 for idx, d in enumerate(disk_sizes)]
1185 (_compute_fn(name, qualifier, ipolicy, value)
1186 for (name, qualifier, value) in test_settings))
1189 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1190 _compute_fn=_ComputeIPolicySpecViolation):
1191 """Compute if instance meets the specs of ipolicy.
1194 @param ipolicy: The ipolicy to verify against
1195 @type instance: L{objects.Instance}
1196 @param instance: The instance to verify
1197 @param _compute_fn: The function to verify ipolicy (unittest only)
1198 @see: L{_ComputeIPolicySpecViolation}
1201 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1202 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1203 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1204 disk_count = len(instance.disks)
1205 disk_sizes = [disk.size for disk in instance.disks]
1206 nic_count = len(instance.nics)
1208 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1209 disk_sizes, spindle_use)
1212 def _ComputeIPolicyInstanceSpecViolation(
1213 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1214 """Compute if instance specs meets the specs of ipolicy.
1217 @param ipolicy: The ipolicy to verify against
1218 @param instance_spec: dict
1219 @param instance_spec: The instance spec to verify
1220 @param _compute_fn: The function to verify ipolicy (unittest only)
1221 @see: L{_ComputeIPolicySpecViolation}
1224 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1225 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1226 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1227 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1228 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1229 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1231 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1232 disk_sizes, spindle_use)
1235 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1237 _compute_fn=_ComputeIPolicyInstanceViolation):
1238 """Compute if instance meets the specs of the new target group.
1240 @param ipolicy: The ipolicy to verify
1241 @param instance: The instance object to verify
1242 @param current_group: The current group of the instance
1243 @param target_group: The new group of the instance
1244 @param _compute_fn: The function to verify ipolicy (unittest only)
1245 @see: L{_ComputeIPolicySpecViolation}
1248 if current_group == target_group:
1251 return _compute_fn(ipolicy, instance)
1254 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1255 _compute_fn=_ComputeIPolicyNodeViolation):
1256 """Checks that the target node is correct in terms of instance policy.
1258 @param ipolicy: The ipolicy to verify
1259 @param instance: The instance object to verify
1260 @param node: The new node to relocate
1261 @param ignore: Ignore violations of the ipolicy
1262 @param _compute_fn: The function to verify ipolicy (unittest only)
1263 @see: L{_ComputeIPolicySpecViolation}
1266 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1267 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1270 msg = ("Instance does not meet target node group's (%s) instance"
1271 " policy: %s") % (node.group, utils.CommaJoin(res))
1275 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1278 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1279 """Computes a set of any instances that would violate the new ipolicy.
1281 @param old_ipolicy: The current (still in-place) ipolicy
1282 @param new_ipolicy: The new (to become) ipolicy
1283 @param instances: List of instances to verify
1284 @return: A list of instances which violates the new ipolicy but
1288 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1289 _ComputeViolatingInstances(old_ipolicy, instances))
1292 def _ExpandItemName(fn, name, kind):
1293 """Expand an item name.
1295 @param fn: the function to use for expansion
1296 @param name: requested item name
1297 @param kind: text description ('Node' or 'Instance')
1298 @return: the resolved (full) name
1299 @raise errors.OpPrereqError: if the item is not found
1302 full_name = fn(name)
1303 if full_name is None:
1304 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1309 def _ExpandNodeName(cfg, name):
1310 """Wrapper over L{_ExpandItemName} for nodes."""
1311 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1314 def _ExpandInstanceName(cfg, name):
1315 """Wrapper over L{_ExpandItemName} for instance."""
1316 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1319 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1320 network_type, mac_prefix, tags):
1321 """Builds network related env variables for hooks
1323 This builds the hook environment from individual variables.
1326 @param name: the name of the network
1327 @type subnet: string
1328 @param subnet: the ipv4 subnet
1329 @type gateway: string
1330 @param gateway: the ipv4 gateway
1331 @type network6: string
1332 @param network6: the ipv6 subnet
1333 @type gateway6: string
1334 @param gateway6: the ipv6 gateway
1335 @type network_type: string
1336 @param network_type: the type of the network
1337 @type mac_prefix: string
1338 @param mac_prefix: the mac_prefix
1340 @param tags: the tags of the network
1345 env["NETWORK_NAME"] = name
1347 env["NETWORK_SUBNET"] = subnet
1349 env["NETWORK_GATEWAY"] = gateway
1351 env["NETWORK_SUBNET6"] = network6
1353 env["NETWORK_GATEWAY6"] = gateway6
1355 env["NETWORK_MAC_PREFIX"] = mac_prefix
1357 env["NETWORK_TYPE"] = network_type
1359 env["NETWORK_TAGS"] = " ".join(tags)
1364 def _BuildNetworkHookEnvByObject(net):
1365 """Builds network related env varliables for hooks
1367 @type net: L{objects.Network}
1368 @param net: the network object
1373 "subnet": net.network,
1374 "gateway": net.gateway,
1375 "network6": net.network6,
1376 "gateway6": net.gateway6,
1377 "network_type": net.network_type,
1378 "mac_prefix": net.mac_prefix,
1382 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1385 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1386 minmem, maxmem, vcpus, nics, disk_template, disks,
1387 bep, hvp, hypervisor_name, tags):
1388 """Builds instance related env variables for hooks
1390 This builds the hook environment from individual variables.
1393 @param name: the name of the instance
1394 @type primary_node: string
1395 @param primary_node: the name of the instance's primary node
1396 @type secondary_nodes: list
1397 @param secondary_nodes: list of secondary nodes as strings
1398 @type os_type: string
1399 @param os_type: the name of the instance's OS
1400 @type status: string
1401 @param status: the desired status of the instance
1402 @type minmem: string
1403 @param minmem: the minimum memory size of the instance
1404 @type maxmem: string
1405 @param maxmem: the maximum memory size of the instance
1407 @param vcpus: the count of VCPUs the instance has
1409 @param nics: list of tuples (ip, mac, mode, link, network) representing
1410 the NICs the instance has
1411 @type disk_template: string
1412 @param disk_template: the disk template of the instance
1414 @param disks: the list of (size, mode) pairs
1416 @param bep: the backend parameters for the instance
1418 @param hvp: the hypervisor parameters for the instance
1419 @type hypervisor_name: string
1420 @param hypervisor_name: the hypervisor for the instance
1422 @param tags: list of instance tags as strings
1424 @return: the hook environment for this instance
1429 "INSTANCE_NAME": name,
1430 "INSTANCE_PRIMARY": primary_node,
1431 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1432 "INSTANCE_OS_TYPE": os_type,
1433 "INSTANCE_STATUS": status,
1434 "INSTANCE_MINMEM": minmem,
1435 "INSTANCE_MAXMEM": maxmem,
1436 # TODO(2.7) remove deprecated "memory" value
1437 "INSTANCE_MEMORY": maxmem,
1438 "INSTANCE_VCPUS": vcpus,
1439 "INSTANCE_DISK_TEMPLATE": disk_template,
1440 "INSTANCE_HYPERVISOR": hypervisor_name,
1443 nic_count = len(nics)
1444 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1447 env["INSTANCE_NIC%d_IP" % idx] = ip
1448 env["INSTANCE_NIC%d_MAC" % idx] = mac
1449 env["INSTANCE_NIC%d_MODE" % idx] = mode
1450 env["INSTANCE_NIC%d_LINK" % idx] = link
1452 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1454 nobj = objects.Network.FromDict(netinfo)
1456 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1458 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1460 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1462 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1464 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1465 if nobj.network_type:
1466 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1468 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1469 if mode == constants.NIC_MODE_BRIDGED:
1470 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1474 env["INSTANCE_NIC_COUNT"] = nic_count
1477 disk_count = len(disks)
1478 for idx, (size, mode) in enumerate(disks):
1479 env["INSTANCE_DISK%d_SIZE" % idx] = size
1480 env["INSTANCE_DISK%d_MODE" % idx] = mode
1484 env["INSTANCE_DISK_COUNT"] = disk_count
1489 env["INSTANCE_TAGS"] = " ".join(tags)
1491 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1492 for key, value in source.items():
1493 env["INSTANCE_%s_%s" % (kind, key)] = value
1498 def _NICToTuple(lu, nic):
1499 """Build a tupple of nic information.
1501 @type lu: L{LogicalUnit}
1502 @param lu: the logical unit on whose behalf we execute
1503 @type nic: L{objects.NIC}
1504 @param nic: nic to convert to hooks tuple
1509 cluster = lu.cfg.GetClusterInfo()
1510 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1511 mode = filled_params[constants.NIC_MODE]
1512 link = filled_params[constants.NIC_LINK]
1516 net_uuid = lu.cfg.LookupNetwork(net)
1518 nobj = lu.cfg.GetNetwork(net_uuid)
1519 netinfo = objects.Network.ToDict(nobj)
1520 return (ip, mac, mode, link, net, netinfo)
1523 def _NICListToTuple(lu, nics):
1524 """Build a list of nic information tuples.
1526 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1527 value in LUInstanceQueryData.
1529 @type lu: L{LogicalUnit}
1530 @param lu: the logical unit on whose behalf we execute
1531 @type nics: list of L{objects.NIC}
1532 @param nics: list of nics to convert to hooks tuples
1537 hooks_nics.append(_NICToTuple(lu, nic))
1541 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1542 """Builds instance related env variables for hooks from an object.
1544 @type lu: L{LogicalUnit}
1545 @param lu: the logical unit on whose behalf we execute
1546 @type instance: L{objects.Instance}
1547 @param instance: the instance for which we should build the
1549 @type override: dict
1550 @param override: dictionary with key/values that will override
1553 @return: the hook environment dictionary
1556 cluster = lu.cfg.GetClusterInfo()
1557 bep = cluster.FillBE(instance)
1558 hvp = cluster.FillHV(instance)
1560 "name": instance.name,
1561 "primary_node": instance.primary_node,
1562 "secondary_nodes": instance.secondary_nodes,
1563 "os_type": instance.os,
1564 "status": instance.admin_state,
1565 "maxmem": bep[constants.BE_MAXMEM],
1566 "minmem": bep[constants.BE_MINMEM],
1567 "vcpus": bep[constants.BE_VCPUS],
1568 "nics": _NICListToTuple(lu, instance.nics),
1569 "disk_template": instance.disk_template,
1570 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1573 "hypervisor_name": instance.hypervisor,
1574 "tags": instance.tags,
1577 args.update(override)
1578 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1581 def _AdjustCandidatePool(lu, exceptions):
1582 """Adjust the candidate pool after node operations.
1585 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1587 lu.LogInfo("Promoted nodes to master candidate role: %s",
1588 utils.CommaJoin(node.name for node in mod_list))
1589 for name in mod_list:
1590 lu.context.ReaddNode(name)
1591 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1593 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1597 def _DecideSelfPromotion(lu, exceptions=None):
1598 """Decide whether I should promote myself as a master candidate.
1601 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1602 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1603 # the new node will increase mc_max with one, so:
1604 mc_should = min(mc_should + 1, cp_size)
1605 return mc_now < mc_should
1608 def _ComputeViolatingInstances(ipolicy, instances):
1609 """Computes a set of instances who violates given ipolicy.
1611 @param ipolicy: The ipolicy to verify
1612 @type instances: object.Instance
1613 @param instances: List of instances to verify
1614 @return: A frozenset of instance names violating the ipolicy
1617 return frozenset([inst.name for inst in instances
1618 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1621 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1622 """Check that the brigdes needed by a list of nics exist.
1625 cluster = lu.cfg.GetClusterInfo()
1626 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1627 brlist = [params[constants.NIC_LINK] for params in paramslist
1628 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1630 result = lu.rpc.call_bridges_exist(target_node, brlist)
1631 result.Raise("Error checking bridges on destination node '%s'" %
1632 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1635 def _CheckInstanceBridgesExist(lu, instance, node=None):
1636 """Check that the brigdes needed by an instance exist.
1640 node = instance.primary_node
1641 _CheckNicsBridgesExist(lu, instance.nics, node)
1644 def _CheckOSVariant(os_obj, name):
1645 """Check whether an OS name conforms to the os variants specification.
1647 @type os_obj: L{objects.OS}
1648 @param os_obj: OS object to check
1650 @param name: OS name passed by the user, to check for validity
1653 variant = objects.OS.GetVariant(name)
1654 if not os_obj.supported_variants:
1656 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1657 " passed)" % (os_obj.name, variant),
1661 raise errors.OpPrereqError("OS name must include a variant",
1664 if variant not in os_obj.supported_variants:
1665 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1668 def _GetNodeInstancesInner(cfg, fn):
1669 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1672 def _GetNodeInstances(cfg, node_name):
1673 """Returns a list of all primary and secondary instances on a node.
1677 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1680 def _GetNodePrimaryInstances(cfg, node_name):
1681 """Returns primary instances on a node.
1684 return _GetNodeInstancesInner(cfg,
1685 lambda inst: node_name == inst.primary_node)
1688 def _GetNodeSecondaryInstances(cfg, node_name):
1689 """Returns secondary instances on a node.
1692 return _GetNodeInstancesInner(cfg,
1693 lambda inst: node_name in inst.secondary_nodes)
1696 def _GetStorageTypeArgs(cfg, storage_type):
1697 """Returns the arguments for a storage type.
1700 # Special case for file storage
1701 if storage_type == constants.ST_FILE:
1702 # storage.FileStorage wants a list of storage directories
1703 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1708 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1711 for dev in instance.disks:
1712 cfg.SetDiskID(dev, node_name)
1714 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1716 result.Raise("Failed to get disk status from node %s" % node_name,
1717 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1719 for idx, bdev_status in enumerate(result.payload):
1720 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1726 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1727 """Check the sanity of iallocator and node arguments and use the
1728 cluster-wide iallocator if appropriate.
1730 Check that at most one of (iallocator, node) is specified. If none is
1731 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1732 then the LU's opcode's iallocator slot is filled with the cluster-wide
1735 @type iallocator_slot: string
1736 @param iallocator_slot: the name of the opcode iallocator slot
1737 @type node_slot: string
1738 @param node_slot: the name of the opcode target node slot
1741 node = getattr(lu.op, node_slot, None)
1742 ialloc = getattr(lu.op, iallocator_slot, None)
1746 if node is not None and ialloc is not None:
1747 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1749 elif ((node is None and ialloc is None) or
1750 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1751 default_iallocator = lu.cfg.GetDefaultIAllocator()
1752 if default_iallocator:
1753 setattr(lu.op, iallocator_slot, default_iallocator)
1755 raise errors.OpPrereqError("No iallocator or node given and no"
1756 " cluster-wide default iallocator found;"
1757 " please specify either an iallocator or a"
1758 " node, or set a cluster-wide default"
1759 " iallocator", errors.ECODE_INVAL)
1762 def _GetDefaultIAllocator(cfg, ialloc):
1763 """Decides on which iallocator to use.
1765 @type cfg: L{config.ConfigWriter}
1766 @param cfg: Cluster configuration object
1767 @type ialloc: string or None
1768 @param ialloc: Iallocator specified in opcode
1770 @return: Iallocator name
1774 # Use default iallocator
1775 ialloc = cfg.GetDefaultIAllocator()
1778 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1779 " opcode nor as a cluster-wide default",
1785 def _CheckHostnameSane(lu, name):
1786 """Ensures that a given hostname resolves to a 'sane' name.
1788 The given name is required to be a prefix of the resolved hostname,
1789 to prevent accidental mismatches.
1791 @param lu: the logical unit on behalf of which we're checking
1792 @param name: the name we should resolve and check
1793 @return: the resolved hostname object
1796 hostname = netutils.GetHostname(name=name)
1797 if hostname.name != name:
1798 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1799 if not utils.MatchNameComponent(name, [hostname.name]):
1800 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1801 " same as given hostname '%s'") %
1802 (hostname.name, name), errors.ECODE_INVAL)
1806 class LUClusterPostInit(LogicalUnit):
1807 """Logical unit for running hooks after cluster initialization.
1810 HPATH = "cluster-init"
1811 HTYPE = constants.HTYPE_CLUSTER
1813 def BuildHooksEnv(self):
1818 "OP_TARGET": self.cfg.GetClusterName(),
1821 def BuildHooksNodes(self):
1822 """Build hooks nodes.
1825 return ([], [self.cfg.GetMasterNode()])
1827 def Exec(self, feedback_fn):
1834 class LUClusterDestroy(LogicalUnit):
1835 """Logical unit for destroying the cluster.
1838 HPATH = "cluster-destroy"
1839 HTYPE = constants.HTYPE_CLUSTER
1841 def BuildHooksEnv(self):
1846 "OP_TARGET": self.cfg.GetClusterName(),
1849 def BuildHooksNodes(self):
1850 """Build hooks nodes.
1855 def CheckPrereq(self):
1856 """Check prerequisites.
1858 This checks whether the cluster is empty.
1860 Any errors are signaled by raising errors.OpPrereqError.
1863 master = self.cfg.GetMasterNode()
1865 nodelist = self.cfg.GetNodeList()
1866 if len(nodelist) != 1 or nodelist[0] != master:
1867 raise errors.OpPrereqError("There are still %d node(s) in"
1868 " this cluster." % (len(nodelist) - 1),
1870 instancelist = self.cfg.GetInstanceList()
1872 raise errors.OpPrereqError("There are still %d instance(s) in"
1873 " this cluster." % len(instancelist),
1876 def Exec(self, feedback_fn):
1877 """Destroys the cluster.
1880 master_params = self.cfg.GetMasterNetworkParameters()
1882 # Run post hooks on master node before it's removed
1883 _RunPostHook(self, master_params.name)
1885 ems = self.cfg.GetUseExternalMipScript()
1886 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1889 self.LogWarning("Error disabling the master IP address: %s",
1892 return master_params.name
1895 def _VerifyCertificate(filename):
1896 """Verifies a certificate for L{LUClusterVerifyConfig}.
1898 @type filename: string
1899 @param filename: Path to PEM file
1903 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1904 utils.ReadFile(filename))
1905 except Exception, err: # pylint: disable=W0703
1906 return (LUClusterVerifyConfig.ETYPE_ERROR,
1907 "Failed to load X509 certificate %s: %s" % (filename, err))
1910 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1911 constants.SSL_CERT_EXPIRATION_ERROR)
1914 fnamemsg = "While verifying %s: %s" % (filename, msg)
1919 return (None, fnamemsg)
1920 elif errcode == utils.CERT_WARNING:
1921 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1922 elif errcode == utils.CERT_ERROR:
1923 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1925 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1928 def _GetAllHypervisorParameters(cluster, instances):
1929 """Compute the set of all hypervisor parameters.
1931 @type cluster: L{objects.Cluster}
1932 @param cluster: the cluster object
1933 @param instances: list of L{objects.Instance}
1934 @param instances: additional instances from which to obtain parameters
1935 @rtype: list of (origin, hypervisor, parameters)
1936 @return: a list with all parameters found, indicating the hypervisor they
1937 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1942 for hv_name in cluster.enabled_hypervisors:
1943 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1945 for os_name, os_hvp in cluster.os_hvp.items():
1946 for hv_name, hv_params in os_hvp.items():
1948 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1949 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1951 # TODO: collapse identical parameter values in a single one
1952 for instance in instances:
1953 if instance.hvparams:
1954 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1955 cluster.FillHV(instance)))
1960 class _VerifyErrors(object):
1961 """Mix-in for cluster/group verify LUs.
1963 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1964 self.op and self._feedback_fn to be available.)
1968 ETYPE_FIELD = "code"
1969 ETYPE_ERROR = "ERROR"
1970 ETYPE_WARNING = "WARNING"
1972 def _Error(self, ecode, item, msg, *args, **kwargs):
1973 """Format an error message.
1975 Based on the opcode's error_codes parameter, either format a
1976 parseable error code, or a simpler error string.
1978 This must be called only from Exec and functions called from Exec.
1981 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1982 itype, etxt, _ = ecode
1983 # first complete the msg
1986 # then format the whole message
1987 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1988 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1994 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1995 # and finally report it via the feedback_fn
1996 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1998 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1999 """Log an error message if the passed condition is True.
2003 or self.op.debug_simulate_errors) # pylint: disable=E1101
2005 # If the error code is in the list of ignored errors, demote the error to a
2007 (_, etxt, _) = ecode
2008 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2009 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2012 self._Error(ecode, *args, **kwargs)
2014 # do not mark the operation as failed for WARN cases only
2015 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2016 self.bad = self.bad or cond
2019 class LUClusterVerify(NoHooksLU):
2020 """Submits all jobs necessary to verify the cluster.
2025 def ExpandNames(self):
2026 self.needed_locks = {}
2028 def Exec(self, feedback_fn):
2031 if self.op.group_name:
2032 groups = [self.op.group_name]
2033 depends_fn = lambda: None
2035 groups = self.cfg.GetNodeGroupList()
2037 # Verify global configuration
2039 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2042 # Always depend on global verification
2043 depends_fn = lambda: [(-len(jobs), [])]
2046 [opcodes.OpClusterVerifyGroup(group_name=group,
2047 ignore_errors=self.op.ignore_errors,
2048 depends=depends_fn())]
2049 for group in groups)
2051 # Fix up all parameters
2052 for op in itertools.chain(*jobs): # pylint: disable=W0142
2053 op.debug_simulate_errors = self.op.debug_simulate_errors
2054 op.verbose = self.op.verbose
2055 op.error_codes = self.op.error_codes
2057 op.skip_checks = self.op.skip_checks
2058 except AttributeError:
2059 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2061 return ResultWithJobs(jobs)
2064 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2065 """Verifies the cluster config.
2070 def _VerifyHVP(self, hvp_data):
2071 """Verifies locally the syntax of the hypervisor parameters.
2074 for item, hv_name, hv_params in hvp_data:
2075 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2078 hv_class = hypervisor.GetHypervisor(hv_name)
2079 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2080 hv_class.CheckParameterSyntax(hv_params)
2081 except errors.GenericError, err:
2082 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2084 def ExpandNames(self):
2085 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2086 self.share_locks = _ShareAll()
2088 def CheckPrereq(self):
2089 """Check prerequisites.
2092 # Retrieve all information
2093 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2094 self.all_node_info = self.cfg.GetAllNodesInfo()
2095 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2097 def Exec(self, feedback_fn):
2098 """Verify integrity of cluster, performing various test on nodes.
2102 self._feedback_fn = feedback_fn
2104 feedback_fn("* Verifying cluster config")
2106 for msg in self.cfg.VerifyConfig():
2107 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2109 feedback_fn("* Verifying cluster certificate files")
2111 for cert_filename in pathutils.ALL_CERT_FILES:
2112 (errcode, msg) = _VerifyCertificate(cert_filename)
2113 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2115 feedback_fn("* Verifying hypervisor parameters")
2117 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2118 self.all_inst_info.values()))
2120 feedback_fn("* Verifying all nodes belong to an existing group")
2122 # We do this verification here because, should this bogus circumstance
2123 # occur, it would never be caught by VerifyGroup, which only acts on
2124 # nodes/instances reachable from existing node groups.
2126 dangling_nodes = set(node.name for node in self.all_node_info.values()
2127 if node.group not in self.all_group_info)
2129 dangling_instances = {}
2130 no_node_instances = []
2132 for inst in self.all_inst_info.values():
2133 if inst.primary_node in dangling_nodes:
2134 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2135 elif inst.primary_node not in self.all_node_info:
2136 no_node_instances.append(inst.name)
2141 utils.CommaJoin(dangling_instances.get(node.name,
2143 for node in dangling_nodes]
2145 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2147 "the following nodes (and their instances) belong to a non"
2148 " existing group: %s", utils.CommaJoin(pretty_dangling))
2150 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2152 "the following instances have a non-existing primary-node:"
2153 " %s", utils.CommaJoin(no_node_instances))
2158 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2159 """Verifies the status of a node group.
2162 HPATH = "cluster-verify"
2163 HTYPE = constants.HTYPE_CLUSTER
2166 _HOOKS_INDENT_RE = re.compile("^", re.M)
2168 class NodeImage(object):
2169 """A class representing the logical and physical status of a node.
2172 @ivar name: the node name to which this object refers
2173 @ivar volumes: a structure as returned from
2174 L{ganeti.backend.GetVolumeList} (runtime)
2175 @ivar instances: a list of running instances (runtime)
2176 @ivar pinst: list of configured primary instances (config)
2177 @ivar sinst: list of configured secondary instances (config)
2178 @ivar sbp: dictionary of {primary-node: list of instances} for all
2179 instances for which this node is secondary (config)
2180 @ivar mfree: free memory, as reported by hypervisor (runtime)
2181 @ivar dfree: free disk, as reported by the node (runtime)
2182 @ivar offline: the offline status (config)
2183 @type rpc_fail: boolean
2184 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2185 not whether the individual keys were correct) (runtime)
2186 @type lvm_fail: boolean
2187 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2188 @type hyp_fail: boolean
2189 @ivar hyp_fail: whether the RPC call didn't return the instance list
2190 @type ghost: boolean
2191 @ivar ghost: whether this is a known node or not (config)
2192 @type os_fail: boolean
2193 @ivar os_fail: whether the RPC call didn't return valid OS data
2195 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2196 @type vm_capable: boolean
2197 @ivar vm_capable: whether the node can host instances
2200 def __init__(self, offline=False, name=None, vm_capable=True):
2209 self.offline = offline
2210 self.vm_capable = vm_capable
2211 self.rpc_fail = False
2212 self.lvm_fail = False
2213 self.hyp_fail = False
2215 self.os_fail = False
2218 def ExpandNames(self):
2219 # This raises errors.OpPrereqError on its own:
2220 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2222 # Get instances in node group; this is unsafe and needs verification later
2224 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2226 self.needed_locks = {
2227 locking.LEVEL_INSTANCE: inst_names,
2228 locking.LEVEL_NODEGROUP: [self.group_uuid],
2229 locking.LEVEL_NODE: [],
2232 self.share_locks = _ShareAll()
2234 def DeclareLocks(self, level):
2235 if level == locking.LEVEL_NODE:
2236 # Get members of node group; this is unsafe and needs verification later
2237 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2239 all_inst_info = self.cfg.GetAllInstancesInfo()
2241 # In Exec(), we warn about mirrored instances that have primary and
2242 # secondary living in separate node groups. To fully verify that
2243 # volumes for these instances are healthy, we will need to do an
2244 # extra call to their secondaries. We ensure here those nodes will
2246 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2247 # Important: access only the instances whose lock is owned
2248 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2249 nodes.update(all_inst_info[inst].secondary_nodes)
2251 self.needed_locks[locking.LEVEL_NODE] = nodes
2253 def CheckPrereq(self):
2254 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2255 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2257 group_nodes = set(self.group_info.members)
2259 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2262 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2264 unlocked_instances = \
2265 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2268 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2269 utils.CommaJoin(unlocked_nodes),
2272 if unlocked_instances:
2273 raise errors.OpPrereqError("Missing lock for instances: %s" %
2274 utils.CommaJoin(unlocked_instances),
2277 self.all_node_info = self.cfg.GetAllNodesInfo()
2278 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2280 self.my_node_names = utils.NiceSort(group_nodes)
2281 self.my_inst_names = utils.NiceSort(group_instances)
2283 self.my_node_info = dict((name, self.all_node_info[name])
2284 for name in self.my_node_names)
2286 self.my_inst_info = dict((name, self.all_inst_info[name])
2287 for name in self.my_inst_names)
2289 # We detect here the nodes that will need the extra RPC calls for verifying
2290 # split LV volumes; they should be locked.
2291 extra_lv_nodes = set()
2293 for inst in self.my_inst_info.values():
2294 if inst.disk_template in constants.DTS_INT_MIRROR:
2295 for nname in inst.all_nodes:
2296 if self.all_node_info[nname].group != self.group_uuid:
2297 extra_lv_nodes.add(nname)
2299 unlocked_lv_nodes = \
2300 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2302 if unlocked_lv_nodes:
2303 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2304 utils.CommaJoin(unlocked_lv_nodes),
2306 self.extra_lv_nodes = list(extra_lv_nodes)
2308 def _VerifyNode(self, ninfo, nresult):
2309 """Perform some basic validation on data returned from a node.
2311 - check the result data structure is well formed and has all the
2313 - check ganeti version
2315 @type ninfo: L{objects.Node}
2316 @param ninfo: the node to check
2317 @param nresult: the results from the node
2319 @return: whether overall this call was successful (and we can expect
2320 reasonable values in the respose)
2324 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2326 # main result, nresult should be a non-empty dict
2327 test = not nresult or not isinstance(nresult, dict)
2328 _ErrorIf(test, constants.CV_ENODERPC, node,
2329 "unable to verify node: no data returned")
2333 # compares ganeti version
2334 local_version = constants.PROTOCOL_VERSION
2335 remote_version = nresult.get("version", None)
2336 test = not (remote_version and
2337 isinstance(remote_version, (list, tuple)) and
2338 len(remote_version) == 2)
2339 _ErrorIf(test, constants.CV_ENODERPC, node,
2340 "connection to node returned invalid data")
2344 test = local_version != remote_version[0]
2345 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2346 "incompatible protocol versions: master %s,"
2347 " node %s", local_version, remote_version[0])
2351 # node seems compatible, we can actually try to look into its results
2353 # full package version
2354 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2355 constants.CV_ENODEVERSION, node,
2356 "software version mismatch: master %s, node %s",
2357 constants.RELEASE_VERSION, remote_version[1],
2358 code=self.ETYPE_WARNING)
2360 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2361 if ninfo.vm_capable and isinstance(hyp_result, dict):
2362 for hv_name, hv_result in hyp_result.iteritems():
2363 test = hv_result is not None
2364 _ErrorIf(test, constants.CV_ENODEHV, node,
2365 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2367 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2368 if ninfo.vm_capable and isinstance(hvp_result, list):
2369 for item, hv_name, hv_result in hvp_result:
2370 _ErrorIf(True, constants.CV_ENODEHV, node,
2371 "hypervisor %s parameter verify failure (source %s): %s",
2372 hv_name, item, hv_result)
2374 test = nresult.get(constants.NV_NODESETUP,
2375 ["Missing NODESETUP results"])
2376 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2381 def _VerifyNodeTime(self, ninfo, nresult,
2382 nvinfo_starttime, nvinfo_endtime):
2383 """Check the node time.
2385 @type ninfo: L{objects.Node}
2386 @param ninfo: the node to check
2387 @param nresult: the remote results for the node
2388 @param nvinfo_starttime: the start time of the RPC call
2389 @param nvinfo_endtime: the end time of the RPC call
2393 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2395 ntime = nresult.get(constants.NV_TIME, None)
2397 ntime_merged = utils.MergeTime(ntime)
2398 except (ValueError, TypeError):
2399 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2402 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2403 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2404 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2405 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2409 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2410 "Node time diverges by at least %s from master node time",
2413 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2414 """Check the node LVM results.
2416 @type ninfo: L{objects.Node}
2417 @param ninfo: the node to check
2418 @param nresult: the remote results for the node
2419 @param vg_name: the configured VG name
2426 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2428 # checks vg existence and size > 20G
2429 vglist = nresult.get(constants.NV_VGLIST, None)
2431 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2433 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2434 constants.MIN_VG_SIZE)
2435 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2438 pvlist = nresult.get(constants.NV_PVLIST, None)
2439 test = pvlist is None
2440 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2442 # check that ':' is not present in PV names, since it's a
2443 # special character for lvcreate (denotes the range of PEs to
2445 for _, pvname, owner_vg in pvlist:
2446 test = ":" in pvname
2447 _ErrorIf(test, constants.CV_ENODELVM, node,
2448 "Invalid character ':' in PV '%s' of VG '%s'",
2451 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2452 """Check the node bridges.
2454 @type ninfo: L{objects.Node}
2455 @param ninfo: the node to check
2456 @param nresult: the remote results for the node
2457 @param bridges: the expected list of bridges
2464 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2466 missing = nresult.get(constants.NV_BRIDGES, None)
2467 test = not isinstance(missing, list)
2468 _ErrorIf(test, constants.CV_ENODENET, node,
2469 "did not return valid bridge information")
2471 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2472 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2474 def _VerifyNodeUserScripts(self, ninfo, nresult):
2475 """Check the results of user scripts presence and executability on the node
2477 @type ninfo: L{objects.Node}
2478 @param ninfo: the node to check
2479 @param nresult: the remote results for the node
2484 test = not constants.NV_USERSCRIPTS in nresult
2485 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2486 "did not return user scripts information")
2488 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2490 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2491 "user scripts not present or not executable: %s" %
2492 utils.CommaJoin(sorted(broken_scripts)))
2494 def _VerifyNodeNetwork(self, ninfo, nresult):
2495 """Check the node network connectivity results.
2497 @type ninfo: L{objects.Node}
2498 @param ninfo: the node to check
2499 @param nresult: the remote results for the node
2503 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2505 test = constants.NV_NODELIST not in nresult
2506 _ErrorIf(test, constants.CV_ENODESSH, node,
2507 "node hasn't returned node ssh connectivity data")
2509 if nresult[constants.NV_NODELIST]:
2510 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2511 _ErrorIf(True, constants.CV_ENODESSH, node,
2512 "ssh communication with node '%s': %s", a_node, a_msg)
2514 test = constants.NV_NODENETTEST not in nresult
2515 _ErrorIf(test, constants.CV_ENODENET, node,
2516 "node hasn't returned node tcp connectivity data")
2518 if nresult[constants.NV_NODENETTEST]:
2519 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2521 _ErrorIf(True, constants.CV_ENODENET, node,
2522 "tcp communication with node '%s': %s",
2523 anode, nresult[constants.NV_NODENETTEST][anode])
2525 test = constants.NV_MASTERIP not in nresult
2526 _ErrorIf(test, constants.CV_ENODENET, node,
2527 "node hasn't returned node master IP reachability data")
2529 if not nresult[constants.NV_MASTERIP]:
2530 if node == self.master_node:
2531 msg = "the master node cannot reach the master IP (not configured?)"
2533 msg = "cannot reach the master IP"
2534 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2536 def _VerifyInstance(self, instance, instanceconfig, node_image,
2538 """Verify an instance.
2540 This function checks to see if the required block devices are
2541 available on the instance's node.
2544 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2545 node_current = instanceconfig.primary_node
2547 node_vol_should = {}
2548 instanceconfig.MapLVsByNode(node_vol_should)
2550 cluster = self.cfg.GetClusterInfo()
2551 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2553 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2554 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2555 code=self.ETYPE_WARNING)
2557 for node in node_vol_should:
2558 n_img = node_image[node]
2559 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2560 # ignore missing volumes on offline or broken nodes
2562 for volume in node_vol_should[node]:
2563 test = volume not in n_img.volumes
2564 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2565 "volume %s missing on node %s", volume, node)
2567 if instanceconfig.admin_state == constants.ADMINST_UP:
2568 pri_img = node_image[node_current]
2569 test = instance not in pri_img.instances and not pri_img.offline
2570 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2571 "instance not running on its primary node %s",
2574 diskdata = [(nname, success, status, idx)
2575 for (nname, disks) in diskstatus.items()
2576 for idx, (success, status) in enumerate(disks)]
2578 for nname, success, bdev_status, idx in diskdata:
2579 # the 'ghost node' construction in Exec() ensures that we have a
2581 snode = node_image[nname]
2582 bad_snode = snode.ghost or snode.offline
2583 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2584 not success and not bad_snode,
2585 constants.CV_EINSTANCEFAULTYDISK, instance,
2586 "couldn't retrieve status for disk/%s on %s: %s",
2587 idx, nname, bdev_status)
2588 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2589 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2590 constants.CV_EINSTANCEFAULTYDISK, instance,
2591 "disk/%s on %s is faulty", idx, nname)
2593 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2594 """Verify if there are any unknown volumes in the cluster.
2596 The .os, .swap and backup volumes are ignored. All other volumes are
2597 reported as unknown.
2599 @type reserved: L{ganeti.utils.FieldSet}
2600 @param reserved: a FieldSet of reserved volume names
2603 for node, n_img in node_image.items():
2604 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2605 self.all_node_info[node].group != self.group_uuid):
2606 # skip non-healthy nodes
2608 for volume in n_img.volumes:
2609 test = ((node not in node_vol_should or
2610 volume not in node_vol_should[node]) and
2611 not reserved.Matches(volume))
2612 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2613 "volume %s is unknown", volume)
2615 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2616 """Verify N+1 Memory Resilience.
2618 Check that if one single node dies we can still start all the
2619 instances it was primary for.
2622 cluster_info = self.cfg.GetClusterInfo()
2623 for node, n_img in node_image.items():
2624 # This code checks that every node which is now listed as
2625 # secondary has enough memory to host all instances it is
2626 # supposed to should a single other node in the cluster fail.
2627 # FIXME: not ready for failover to an arbitrary node
2628 # FIXME: does not support file-backed instances
2629 # WARNING: we currently take into account down instances as well
2630 # as up ones, considering that even if they're down someone
2631 # might want to start them even in the event of a node failure.
2632 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2633 # we're skipping nodes marked offline and nodes in other groups from
2634 # the N+1 warning, since most likely we don't have good memory
2635 # infromation from them; we already list instances living on such
2636 # nodes, and that's enough warning
2638 #TODO(dynmem): also consider ballooning out other instances
2639 for prinode, instances in n_img.sbp.items():
2641 for instance in instances:
2642 bep = cluster_info.FillBE(instance_cfg[instance])
2643 if bep[constants.BE_AUTO_BALANCE]:
2644 needed_mem += bep[constants.BE_MINMEM]
2645 test = n_img.mfree < needed_mem
2646 self._ErrorIf(test, constants.CV_ENODEN1, node,
2647 "not enough memory to accomodate instance failovers"
2648 " should node %s fail (%dMiB needed, %dMiB available)",
2649 prinode, needed_mem, n_img.mfree)
2652 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2653 (files_all, files_opt, files_mc, files_vm)):
2654 """Verifies file checksums collected from all nodes.
2656 @param errorif: Callback for reporting errors
2657 @param nodeinfo: List of L{objects.Node} objects
2658 @param master_node: Name of master node
2659 @param all_nvinfo: RPC results
2662 # Define functions determining which nodes to consider for a file
2665 (files_mc, lambda node: (node.master_candidate or
2666 node.name == master_node)),
2667 (files_vm, lambda node: node.vm_capable),
2670 # Build mapping from filename to list of nodes which should have the file
2672 for (files, fn) in files2nodefn:
2674 filenodes = nodeinfo
2676 filenodes = filter(fn, nodeinfo)
2677 nodefiles.update((filename,
2678 frozenset(map(operator.attrgetter("name"), filenodes)))
2679 for filename in files)
2681 assert set(nodefiles) == (files_all | files_mc | files_vm)
2683 fileinfo = dict((filename, {}) for filename in nodefiles)
2684 ignore_nodes = set()
2686 for node in nodeinfo:
2688 ignore_nodes.add(node.name)
2691 nresult = all_nvinfo[node.name]
2693 if nresult.fail_msg or not nresult.payload:
2696 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2697 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2698 for (key, value) in fingerprints.items())
2701 test = not (node_files and isinstance(node_files, dict))
2702 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2703 "Node did not return file checksum data")
2705 ignore_nodes.add(node.name)
2708 # Build per-checksum mapping from filename to nodes having it
2709 for (filename, checksum) in node_files.items():
2710 assert filename in nodefiles
2711 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2713 for (filename, checksums) in fileinfo.items():
2714 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2716 # Nodes having the file
2717 with_file = frozenset(node_name
2718 for nodes in fileinfo[filename].values()
2719 for node_name in nodes) - ignore_nodes
2721 expected_nodes = nodefiles[filename] - ignore_nodes
2723 # Nodes missing file
2724 missing_file = expected_nodes - with_file
2726 if filename in files_opt:
2728 errorif(missing_file and missing_file != expected_nodes,
2729 constants.CV_ECLUSTERFILECHECK, None,
2730 "File %s is optional, but it must exist on all or no"
2731 " nodes (not found on %s)",
2732 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2734 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2735 "File %s is missing from node(s) %s", filename,
2736 utils.CommaJoin(utils.NiceSort(missing_file)))
2738 # Warn if a node has a file it shouldn't
2739 unexpected = with_file - expected_nodes
2741 constants.CV_ECLUSTERFILECHECK, None,
2742 "File %s should not exist on node(s) %s",
2743 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2745 # See if there are multiple versions of the file
2746 test = len(checksums) > 1
2748 variants = ["variant %s on %s" %
2749 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2750 for (idx, (checksum, nodes)) in
2751 enumerate(sorted(checksums.items()))]
2755 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2756 "File %s found with %s different checksums (%s)",
2757 filename, len(checksums), "; ".join(variants))
2759 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2761 """Verifies and the node DRBD status.
2763 @type ninfo: L{objects.Node}
2764 @param ninfo: the node to check
2765 @param nresult: the remote results for the node
2766 @param instanceinfo: the dict of instances
2767 @param drbd_helper: the configured DRBD usermode helper
2768 @param drbd_map: the DRBD map as returned by
2769 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2773 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2776 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2777 test = (helper_result is None)
2778 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2779 "no drbd usermode helper returned")
2781 status, payload = helper_result
2783 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2784 "drbd usermode helper check unsuccessful: %s", payload)
2785 test = status and (payload != drbd_helper)
2786 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2787 "wrong drbd usermode helper: %s", payload)
2789 # compute the DRBD minors
2791 for minor, instance in drbd_map[node].items():
2792 test = instance not in instanceinfo
2793 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2794 "ghost instance '%s' in temporary DRBD map", instance)
2795 # ghost instance should not be running, but otherwise we
2796 # don't give double warnings (both ghost instance and
2797 # unallocated minor in use)
2799 node_drbd[minor] = (instance, False)
2801 instance = instanceinfo[instance]
2802 node_drbd[minor] = (instance.name,
2803 instance.admin_state == constants.ADMINST_UP)
2805 # and now check them
2806 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2807 test = not isinstance(used_minors, (tuple, list))
2808 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2809 "cannot parse drbd status file: %s", str(used_minors))
2811 # we cannot check drbd status
2814 for minor, (iname, must_exist) in node_drbd.items():
2815 test = minor not in used_minors and must_exist
2816 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2817 "drbd minor %d of instance %s is not active", minor, iname)
2818 for minor in used_minors:
2819 test = minor not in node_drbd
2820 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2821 "unallocated drbd minor %d is in use", minor)
2823 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2824 """Builds the node OS structures.
2826 @type ninfo: L{objects.Node}
2827 @param ninfo: the node to check
2828 @param nresult: the remote results for the node
2829 @param nimg: the node image object
2833 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2835 remote_os = nresult.get(constants.NV_OSLIST, None)
2836 test = (not isinstance(remote_os, list) or
2837 not compat.all(isinstance(v, list) and len(v) == 7
2838 for v in remote_os))
2840 _ErrorIf(test, constants.CV_ENODEOS, node,
2841 "node hasn't returned valid OS data")
2850 for (name, os_path, status, diagnose,
2851 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2853 if name not in os_dict:
2856 # parameters is a list of lists instead of list of tuples due to
2857 # JSON lacking a real tuple type, fix it:
2858 parameters = [tuple(v) for v in parameters]
2859 os_dict[name].append((os_path, status, diagnose,
2860 set(variants), set(parameters), set(api_ver)))
2862 nimg.oslist = os_dict
2864 def _VerifyNodeOS(self, ninfo, nimg, base):
2865 """Verifies the node OS list.
2867 @type ninfo: L{objects.Node}
2868 @param ninfo: the node to check
2869 @param nimg: the node image object
2870 @param base: the 'template' node we match against (e.g. from the master)
2874 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2876 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2878 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2879 for os_name, os_data in nimg.oslist.items():
2880 assert os_data, "Empty OS status for OS %s?!" % os_name
2881 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2882 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2883 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2884 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2885 "OS '%s' has multiple entries (first one shadows the rest): %s",
2886 os_name, utils.CommaJoin([v[0] for v in os_data]))
2887 # comparisons with the 'base' image
2888 test = os_name not in base.oslist
2889 _ErrorIf(test, constants.CV_ENODEOS, node,
2890 "Extra OS %s not present on reference node (%s)",
2894 assert base.oslist[os_name], "Base node has empty OS status?"
2895 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2897 # base OS is invalid, skipping
2899 for kind, a, b in [("API version", f_api, b_api),
2900 ("variants list", f_var, b_var),
2901 ("parameters", beautify_params(f_param),
2902 beautify_params(b_param))]:
2903 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2904 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2905 kind, os_name, base.name,
2906 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2908 # check any missing OSes
2909 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2910 _ErrorIf(missing, constants.CV_ENODEOS, node,
2911 "OSes present on reference node %s but missing on this node: %s",
2912 base.name, utils.CommaJoin(missing))
2914 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2915 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2917 @type ninfo: L{objects.Node}
2918 @param ninfo: the node to check
2919 @param nresult: the remote results for the node
2920 @type is_master: bool
2921 @param is_master: Whether node is the master node
2927 (constants.ENABLE_FILE_STORAGE or
2928 constants.ENABLE_SHARED_FILE_STORAGE)):
2930 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2932 # This should never happen
2933 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2934 "Node did not return forbidden file storage paths")
2936 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2937 "Found forbidden file storage paths: %s",
2938 utils.CommaJoin(fspaths))
2940 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2941 constants.CV_ENODEFILESTORAGEPATHS, node,
2942 "Node should not have returned forbidden file storage"
2945 def _VerifyOob(self, ninfo, nresult):
2946 """Verifies out of band functionality of a node.
2948 @type ninfo: L{objects.Node}
2949 @param ninfo: the node to check
2950 @param nresult: the remote results for the node
2954 # We just have to verify the paths on master and/or master candidates
2955 # as the oob helper is invoked on the master
2956 if ((ninfo.master_candidate or ninfo.master_capable) and
2957 constants.NV_OOB_PATHS in nresult):
2958 for path_result in nresult[constants.NV_OOB_PATHS]:
2959 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2961 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2962 """Verifies and updates the node volume data.
2964 This function will update a L{NodeImage}'s internal structures
2965 with data from the remote call.
2967 @type ninfo: L{objects.Node}
2968 @param ninfo: the node to check
2969 @param nresult: the remote results for the node
2970 @param nimg: the node image object
2971 @param vg_name: the configured VG name
2975 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2977 nimg.lvm_fail = True
2978 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2981 elif isinstance(lvdata, basestring):
2982 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2983 utils.SafeEncode(lvdata))
2984 elif not isinstance(lvdata, dict):
2985 _ErrorIf(True, constants.CV_ENODELVM, node,
2986 "rpc call to node failed (lvlist)")
2988 nimg.volumes = lvdata
2989 nimg.lvm_fail = False
2991 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2992 """Verifies and updates the node instance list.
2994 If the listing was successful, then updates this node's instance
2995 list. Otherwise, it marks the RPC call as failed for the instance
2998 @type ninfo: L{objects.Node}
2999 @param ninfo: the node to check
3000 @param nresult: the remote results for the node
3001 @param nimg: the node image object
3004 idata = nresult.get(constants.NV_INSTANCELIST, None)
3005 test = not isinstance(idata, list)
3006 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3007 "rpc call to node failed (instancelist): %s",
3008 utils.SafeEncode(str(idata)))
3010 nimg.hyp_fail = True
3012 nimg.instances = idata
3014 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3015 """Verifies and computes a node information map
3017 @type ninfo: L{objects.Node}
3018 @param ninfo: the node to check
3019 @param nresult: the remote results for the node
3020 @param nimg: the node image object
3021 @param vg_name: the configured VG name
3025 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3027 # try to read free memory (from the hypervisor)
3028 hv_info = nresult.get(constants.NV_HVINFO, None)
3029 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3030 _ErrorIf(test, constants.CV_ENODEHV, node,
3031 "rpc call to node failed (hvinfo)")
3034 nimg.mfree = int(hv_info["memory_free"])
3035 except (ValueError, TypeError):
3036 _ErrorIf(True, constants.CV_ENODERPC, node,
3037 "node returned invalid nodeinfo, check hypervisor")
3039 # FIXME: devise a free space model for file based instances as well
3040 if vg_name is not None:
3041 test = (constants.NV_VGLIST not in nresult or
3042 vg_name not in nresult[constants.NV_VGLIST])
3043 _ErrorIf(test, constants.CV_ENODELVM, node,
3044 "node didn't return data for the volume group '%s'"
3045 " - it is either missing or broken", vg_name)
3048 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3049 except (ValueError, TypeError):
3050 _ErrorIf(True, constants.CV_ENODERPC, node,
3051 "node returned invalid LVM info, check LVM status")
3053 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3054 """Gets per-disk status information for all instances.
3056 @type nodelist: list of strings
3057 @param nodelist: Node names
3058 @type node_image: dict of (name, L{objects.Node})
3059 @param node_image: Node objects
3060 @type instanceinfo: dict of (name, L{objects.Instance})
3061 @param instanceinfo: Instance objects
3062 @rtype: {instance: {node: [(succes, payload)]}}
3063 @return: a dictionary of per-instance dictionaries with nodes as
3064 keys and disk information as values; the disk information is a
3065 list of tuples (success, payload)
3068 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3071 node_disks_devonly = {}
3072 diskless_instances = set()
3073 diskless = constants.DT_DISKLESS
3075 for nname in nodelist:
3076 node_instances = list(itertools.chain(node_image[nname].pinst,
3077 node_image[nname].sinst))
3078 diskless_instances.update(inst for inst in node_instances
3079 if instanceinfo[inst].disk_template == diskless)
3080 disks = [(inst, disk)
3081 for inst in node_instances
3082 for disk in instanceinfo[inst].disks]
3085 # No need to collect data
3088 node_disks[nname] = disks
3090 # _AnnotateDiskParams makes already copies of the disks
3092 for (inst, dev) in disks:
3093 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3094 self.cfg.SetDiskID(anno_disk, nname)
3095 devonly.append(anno_disk)
3097 node_disks_devonly[nname] = devonly
3099 assert len(node_disks) == len(node_disks_devonly)
3101 # Collect data from all nodes with disks
3102 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3105 assert len(result) == len(node_disks)
3109 for (nname, nres) in result.items():
3110 disks = node_disks[nname]
3113 # No data from this node
3114 data = len(disks) * [(False, "node offline")]
3117 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3118 "while getting disk information: %s", msg)
3120 # No data from this node
3121 data = len(disks) * [(False, msg)]
3124 for idx, i in enumerate(nres.payload):
3125 if isinstance(i, (tuple, list)) and len(i) == 2:
3128 logging.warning("Invalid result from node %s, entry %d: %s",
3130 data.append((False, "Invalid result from the remote node"))
3132 for ((inst, _), status) in zip(disks, data):
3133 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3135 # Add empty entries for diskless instances.
3136 for inst in diskless_instances:
3137 assert inst not in instdisk
3140 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3141 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3142 compat.all(isinstance(s, (tuple, list)) and
3143 len(s) == 2 for s in statuses)
3144 for inst, nnames in instdisk.items()
3145 for nname, statuses in nnames.items())
3146 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3151 def _SshNodeSelector(group_uuid, all_nodes):
3152 """Create endless iterators for all potential SSH check hosts.
3155 nodes = [node for node in all_nodes
3156 if (node.group != group_uuid and
3158 keyfunc = operator.attrgetter("group")
3160 return map(itertools.cycle,
3161 [sorted(map(operator.attrgetter("name"), names))
3162 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3166 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3167 """Choose which nodes should talk to which other nodes.
3169 We will make nodes contact all nodes in their group, and one node from
3172 @warning: This algorithm has a known issue if one node group is much
3173 smaller than others (e.g. just one node). In such a case all other
3174 nodes will talk to the single node.
3177 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3178 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3180 return (online_nodes,
3181 dict((name, sorted([i.next() for i in sel]))
3182 for name in online_nodes))
3184 def BuildHooksEnv(self):
3187 Cluster-Verify hooks just ran in the post phase and their failure makes
3188 the output be logged in the verify output and the verification to fail.
3192 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3195 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3196 for node in self.my_node_info.values())
3200 def BuildHooksNodes(self):
3201 """Build hooks nodes.
3204 return ([], self.my_node_names)
3206 def Exec(self, feedback_fn):
3207 """Verify integrity of the node group, performing various test on nodes.
3210 # This method has too many local variables. pylint: disable=R0914
3211 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3213 if not self.my_node_names:
3215 feedback_fn("* Empty node group, skipping verification")
3219 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3220 verbose = self.op.verbose
3221 self._feedback_fn = feedback_fn
3223 vg_name = self.cfg.GetVGName()
3224 drbd_helper = self.cfg.GetDRBDHelper()
3225 cluster = self.cfg.GetClusterInfo()
3226 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3227 hypervisors = cluster.enabled_hypervisors
3228 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3230 i_non_redundant = [] # Non redundant instances
3231 i_non_a_balanced = [] # Non auto-balanced instances
3232 i_offline = 0 # Count of offline instances
3233 n_offline = 0 # Count of offline nodes
3234 n_drained = 0 # Count of nodes being drained
3235 node_vol_should = {}
3237 # FIXME: verify OS list
3240 filemap = _ComputeAncillaryFiles(cluster, False)
3242 # do local checksums
3243 master_node = self.master_node = self.cfg.GetMasterNode()
3244 master_ip = self.cfg.GetMasterIP()
3246 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3249 if self.cfg.GetUseExternalMipScript():
3250 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3252 node_verify_param = {
3253 constants.NV_FILELIST:
3254 map(vcluster.MakeVirtualPath,
3255 utils.UniqueSequence(filename
3256 for files in filemap
3257 for filename in files)),
3258 constants.NV_NODELIST:
3259 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3260 self.all_node_info.values()),
3261 constants.NV_HYPERVISOR: hypervisors,
3262 constants.NV_HVPARAMS:
3263 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3264 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3265 for node in node_data_list
3266 if not node.offline],
3267 constants.NV_INSTANCELIST: hypervisors,
3268 constants.NV_VERSION: None,
3269 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3270 constants.NV_NODESETUP: None,
3271 constants.NV_TIME: None,
3272 constants.NV_MASTERIP: (master_node, master_ip),
3273 constants.NV_OSLIST: None,
3274 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3275 constants.NV_USERSCRIPTS: user_scripts,
3278 if vg_name is not None:
3279 node_verify_param[constants.NV_VGLIST] = None
3280 node_verify_param[constants.NV_LVLIST] = vg_name
3281 node_verify_param[constants.NV_PVLIST] = [vg_name]
3284 node_verify_param[constants.NV_DRBDLIST] = None
3285 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3287 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3288 # Load file storage paths only from master node
3289 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3292 # FIXME: this needs to be changed per node-group, not cluster-wide
3294 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3295 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3296 bridges.add(default_nicpp[constants.NIC_LINK])
3297 for instance in self.my_inst_info.values():
3298 for nic in instance.nics:
3299 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3300 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3301 bridges.add(full_nic[constants.NIC_LINK])
3304 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3306 # Build our expected cluster state
3307 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3309 vm_capable=node.vm_capable))
3310 for node in node_data_list)
3314 for node in self.all_node_info.values():
3315 path = _SupportsOob(self.cfg, node)
3316 if path and path not in oob_paths:
3317 oob_paths.append(path)
3320 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3322 for instance in self.my_inst_names:
3323 inst_config = self.my_inst_info[instance]
3324 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3327 for nname in inst_config.all_nodes:
3328 if nname not in node_image:
3329 gnode = self.NodeImage(name=nname)
3330 gnode.ghost = (nname not in self.all_node_info)
3331 node_image[nname] = gnode
3333 inst_config.MapLVsByNode(node_vol_should)
3335 pnode = inst_config.primary_node
3336 node_image[pnode].pinst.append(instance)
3338 for snode in inst_config.secondary_nodes:
3339 nimg = node_image[snode]
3340 nimg.sinst.append(instance)
3341 if pnode not in nimg.sbp:
3342 nimg.sbp[pnode] = []
3343 nimg.sbp[pnode].append(instance)
3345 # At this point, we have the in-memory data structures complete,
3346 # except for the runtime information, which we'll gather next
3348 # Due to the way our RPC system works, exact response times cannot be
3349 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3350 # time before and after executing the request, we can at least have a time
3352 nvinfo_starttime = time.time()
3353 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3355 self.cfg.GetClusterName())
3356 nvinfo_endtime = time.time()
3358 if self.extra_lv_nodes and vg_name is not None:
3360 self.rpc.call_node_verify(self.extra_lv_nodes,
3361 {constants.NV_LVLIST: vg_name},
3362 self.cfg.GetClusterName())
3364 extra_lv_nvinfo = {}
3366 all_drbd_map = self.cfg.ComputeDRBDMap()
3368 feedback_fn("* Gathering disk information (%s nodes)" %
3369 len(self.my_node_names))
3370 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3373 feedback_fn("* Verifying configuration file consistency")
3375 # If not all nodes are being checked, we need to make sure the master node
3376 # and a non-checked vm_capable node are in the list.
3377 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3379 vf_nvinfo = all_nvinfo.copy()
3380 vf_node_info = list(self.my_node_info.values())
3381 additional_nodes = []
3382 if master_node not in self.my_node_info:
3383 additional_nodes.append(master_node)
3384 vf_node_info.append(self.all_node_info[master_node])
3385 # Add the first vm_capable node we find which is not included,
3386 # excluding the master node (which we already have)
3387 for node in absent_nodes:
3388 nodeinfo = self.all_node_info[node]
3389 if (nodeinfo.vm_capable and not nodeinfo.offline and
3390 node != master_node):
3391 additional_nodes.append(node)
3392 vf_node_info.append(self.all_node_info[node])
3394 key = constants.NV_FILELIST
3395 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3396 {key: node_verify_param[key]},
3397 self.cfg.GetClusterName()))
3399 vf_nvinfo = all_nvinfo
3400 vf_node_info = self.my_node_info.values()
3402 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3404 feedback_fn("* Verifying node status")
3408 for node_i in node_data_list:
3410 nimg = node_image[node]
3414 feedback_fn("* Skipping offline node %s" % (node,))
3418 if node == master_node:
3420 elif node_i.master_candidate:
3421 ntype = "master candidate"
3422 elif node_i.drained:
3428 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3430 msg = all_nvinfo[node].fail_msg
3431 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3434 nimg.rpc_fail = True
3437 nresult = all_nvinfo[node].payload
3439 nimg.call_ok = self._VerifyNode(node_i, nresult)
3440 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3441 self._VerifyNodeNetwork(node_i, nresult)
3442 self._VerifyNodeUserScripts(node_i, nresult)
3443 self._VerifyOob(node_i, nresult)
3444 self._VerifyFileStoragePaths(node_i, nresult,
3445 node == master_node)
3448 self._VerifyNodeLVM(node_i, nresult, vg_name)
3449 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3452 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3453 self._UpdateNodeInstances(node_i, nresult, nimg)
3454 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3455 self._UpdateNodeOS(node_i, nresult, nimg)
3457 if not nimg.os_fail:
3458 if refos_img is None:
3460 self._VerifyNodeOS(node_i, nimg, refos_img)
3461 self._VerifyNodeBridges(node_i, nresult, bridges)
3463 # Check whether all running instancies are primary for the node. (This
3464 # can no longer be done from _VerifyInstance below, since some of the
3465 # wrong instances could be from other node groups.)
3466 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3468 for inst in non_primary_inst:
3469 test = inst in self.all_inst_info
3470 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3471 "instance should not run on node %s", node_i.name)
3472 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3473 "node is running unknown instance %s", inst)
3475 for node, result in extra_lv_nvinfo.items():
3476 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3477 node_image[node], vg_name)
3479 feedback_fn("* Verifying instance status")
3480 for instance in self.my_inst_names:
3482 feedback_fn("* Verifying instance %s" % instance)
3483 inst_config = self.my_inst_info[instance]
3484 self._VerifyInstance(instance, inst_config, node_image,
3486 inst_nodes_offline = []
3488 pnode = inst_config.primary_node
3489 pnode_img = node_image[pnode]
3490 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3491 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3492 " primary node failed", instance)
3494 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3496 constants.CV_EINSTANCEBADNODE, instance,
3497 "instance is marked as running and lives on offline node %s",
3498 inst_config.primary_node)
3500 # If the instance is non-redundant we cannot survive losing its primary
3501 # node, so we are not N+1 compliant.
3502 if inst_config.disk_template not in constants.DTS_MIRRORED:
3503 i_non_redundant.append(instance)
3505 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3506 constants.CV_EINSTANCELAYOUT,
3507 instance, "instance has multiple secondary nodes: %s",
3508 utils.CommaJoin(inst_config.secondary_nodes),
3509 code=self.ETYPE_WARNING)
3511 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3512 pnode = inst_config.primary_node
3513 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3514 instance_groups = {}
3516 for node in instance_nodes:
3517 instance_groups.setdefault(self.all_node_info[node].group,
3521 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3522 # Sort so that we always list the primary node first.
3523 for group, nodes in sorted(instance_groups.items(),
3524 key=lambda (_, nodes): pnode in nodes,
3527 self._ErrorIf(len(instance_groups) > 1,
3528 constants.CV_EINSTANCESPLITGROUPS,
3529 instance, "instance has primary and secondary nodes in"
3530 " different groups: %s", utils.CommaJoin(pretty_list),
3531 code=self.ETYPE_WARNING)
3533 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3534 i_non_a_balanced.append(instance)
3536 for snode in inst_config.secondary_nodes:
3537 s_img = node_image[snode]
3538 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3539 snode, "instance %s, connection to secondary node failed",
3543 inst_nodes_offline.append(snode)
3545 # warn that the instance lives on offline nodes
3546 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3547 "instance has offline secondary node(s) %s",
3548 utils.CommaJoin(inst_nodes_offline))
3549 # ... or ghost/non-vm_capable nodes
3550 for node in inst_config.all_nodes:
3551 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3552 instance, "instance lives on ghost node %s", node)
3553 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3554 instance, "instance lives on non-vm_capable node %s", node)
3556 feedback_fn("* Verifying orphan volumes")
3557 reserved = utils.FieldSet(*cluster.reserved_lvs)
3559 # We will get spurious "unknown volume" warnings if any node of this group
3560 # is secondary for an instance whose primary is in another group. To avoid
3561 # them, we find these instances and add their volumes to node_vol_should.
3562 for inst in self.all_inst_info.values():
3563 for secondary in inst.secondary_nodes:
3564 if (secondary in self.my_node_info
3565 and inst.name not in self.my_inst_info):
3566 inst.MapLVsByNode(node_vol_should)
3569 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3571 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3572 feedback_fn("* Verifying N+1 Memory redundancy")
3573 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3575 feedback_fn("* Other Notes")
3577 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3578 % len(i_non_redundant))
3580 if i_non_a_balanced:
3581 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3582 % len(i_non_a_balanced))
3585 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3588 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3591 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3595 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3596 """Analyze the post-hooks' result
3598 This method analyses the hook result, handles it, and sends some
3599 nicely-formatted feedback back to the user.
3601 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3602 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3603 @param hooks_results: the results of the multi-node hooks rpc call
3604 @param feedback_fn: function used send feedback back to the caller
3605 @param lu_result: previous Exec result
3606 @return: the new Exec result, based on the previous result
3610 # We only really run POST phase hooks, only for non-empty groups,
3611 # and are only interested in their results
3612 if not self.my_node_names:
3615 elif phase == constants.HOOKS_PHASE_POST:
3616 # Used to change hooks' output to proper indentation
3617 feedback_fn("* Hooks Results")
3618 assert hooks_results, "invalid result from hooks"
3620 for node_name in hooks_results:
3621 res = hooks_results[node_name]
3623 test = msg and not res.offline
3624 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3625 "Communication failure in hooks execution: %s", msg)
3626 if res.offline or msg:
3627 # No need to investigate payload if node is offline or gave
3630 for script, hkr, output in res.payload:
3631 test = hkr == constants.HKR_FAIL
3632 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3633 "Script %s failed, output:", script)
3635 output = self._HOOKS_INDENT_RE.sub(" ", output)
3636 feedback_fn("%s" % output)
3642 class LUClusterVerifyDisks(NoHooksLU):
3643 """Verifies the cluster disks status.
3648 def ExpandNames(self):
3649 self.share_locks = _ShareAll()
3650 self.needed_locks = {
3651 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3654 def Exec(self, feedback_fn):
3655 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3657 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3658 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3659 for group in group_names])
3662 class LUGroupVerifyDisks(NoHooksLU):
3663 """Verifies the status of all disks in a node group.
3668 def ExpandNames(self):
3669 # Raises errors.OpPrereqError on its own if group can't be found
3670 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3672 self.share_locks = _ShareAll()
3673 self.needed_locks = {
3674 locking.LEVEL_INSTANCE: [],
3675 locking.LEVEL_NODEGROUP: [],
3676 locking.LEVEL_NODE: [],
3679 def DeclareLocks(self, level):
3680 if level == locking.LEVEL_INSTANCE:
3681 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3683 # Lock instances optimistically, needs verification once node and group
3684 # locks have been acquired
3685 self.needed_locks[locking.LEVEL_INSTANCE] = \
3686 self.cfg.GetNodeGroupInstances(self.group_uuid)
3688 elif level == locking.LEVEL_NODEGROUP:
3689 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3691 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3692 set([self.group_uuid] +
3693 # Lock all groups used by instances optimistically; this requires
3694 # going via the node before it's locked, requiring verification
3697 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3698 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3700 elif level == locking.LEVEL_NODE:
3701 # This will only lock the nodes in the group to be verified which contain
3703 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3704 self._LockInstancesNodes()
3706 # Lock all nodes in group to be verified
3707 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3708 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3709 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3711 def CheckPrereq(self):
3712 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3713 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3714 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3716 assert self.group_uuid in owned_groups
3718 # Check if locked instances are still correct
3719 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3721 # Get instance information
3722 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3724 # Check if node groups for locked instances are still correct
3725 _CheckInstancesNodeGroups(self.cfg, self.instances,
3726 owned_groups, owned_nodes, self.group_uuid)
3728 def Exec(self, feedback_fn):
3729 """Verify integrity of cluster disks.
3731 @rtype: tuple of three items
3732 @return: a tuple of (dict of node-to-node_error, list of instances
3733 which need activate-disks, dict of instance: (node, volume) for
3738 res_instances = set()
3741 nv_dict = _MapInstanceDisksToNodes(
3742 [inst for inst in self.instances.values()
3743 if inst.admin_state == constants.ADMINST_UP])
3746 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3747 set(self.cfg.GetVmCapableNodeList()))
3749 node_lvs = self.rpc.call_lv_list(nodes, [])
3751 for (node, node_res) in node_lvs.items():
3752 if node_res.offline:
3755 msg = node_res.fail_msg
3757 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3758 res_nodes[node] = msg
3761 for lv_name, (_, _, lv_online) in node_res.payload.items():
3762 inst = nv_dict.pop((node, lv_name), None)
3763 if not (lv_online or inst is None):
3764 res_instances.add(inst)
3766 # any leftover items in nv_dict are missing LVs, let's arrange the data
3768 for key, inst in nv_dict.iteritems():
3769 res_missing.setdefault(inst, []).append(list(key))
3771 return (res_nodes, list(res_instances), res_missing)
3774 class LUClusterRepairDiskSizes(NoHooksLU):
3775 """Verifies the cluster disks sizes.
3780 def ExpandNames(self):
3781 if self.op.instances:
3782 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3783 self.needed_locks = {
3784 locking.LEVEL_NODE_RES: [],
3785 locking.LEVEL_INSTANCE: self.wanted_names,
3787 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3789 self.wanted_names = None
3790 self.needed_locks = {
3791 locking.LEVEL_NODE_RES: locking.ALL_SET,
3792 locking.LEVEL_INSTANCE: locking.ALL_SET,
3794 self.share_locks = {
3795 locking.LEVEL_NODE_RES: 1,
3796 locking.LEVEL_INSTANCE: 0,
3799 def DeclareLocks(self, level):
3800 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3801 self._LockInstancesNodes(primary_only=True, level=level)
3803 def CheckPrereq(self):
3804 """Check prerequisites.
3806 This only checks the optional instance list against the existing names.
3809 if self.wanted_names is None:
3810 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3812 self.wanted_instances = \
3813 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3815 def _EnsureChildSizes(self, disk):
3816 """Ensure children of the disk have the needed disk size.
3818 This is valid mainly for DRBD8 and fixes an issue where the
3819 children have smaller disk size.
3821 @param disk: an L{ganeti.objects.Disk} object
3824 if disk.dev_type == constants.LD_DRBD8:
3825 assert disk.children, "Empty children for DRBD8?"
3826 fchild = disk.children[0]
3827 mismatch = fchild.size < disk.size
3829 self.LogInfo("Child disk has size %d, parent %d, fixing",
3830 fchild.size, disk.size)
3831 fchild.size = disk.size
3833 # and we recurse on this child only, not on the metadev
3834 return self._EnsureChildSizes(fchild) or mismatch
3838 def Exec(self, feedback_fn):
3839 """Verify the size of cluster disks.
3842 # TODO: check child disks too
3843 # TODO: check differences in size between primary/secondary nodes
3845 for instance in self.wanted_instances:
3846 pnode = instance.primary_node
3847 if pnode not in per_node_disks:
3848 per_node_disks[pnode] = []
3849 for idx, disk in enumerate(instance.disks):
3850 per_node_disks[pnode].append((instance, idx, disk))
3852 assert not (frozenset(per_node_disks.keys()) -
3853 self.owned_locks(locking.LEVEL_NODE_RES)), \
3854 "Not owning correct locks"
3855 assert not self.owned_locks(locking.LEVEL_NODE)
3858 for node, dskl in per_node_disks.items():
3859 newl = [v[2].Copy() for v in dskl]
3861 self.cfg.SetDiskID(dsk, node)
3862 result = self.rpc.call_blockdev_getsize(node, newl)
3864 self.LogWarning("Failure in blockdev_getsize call to node"
3865 " %s, ignoring", node)
3867 if len(result.payload) != len(dskl):
3868 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3869 " result.payload=%s", node, len(dskl), result.payload)
3870 self.LogWarning("Invalid result from node %s, ignoring node results",
3873 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3875 self.LogWarning("Disk %d of instance %s did not return size"
3876 " information, ignoring", idx, instance.name)
3878 if not isinstance(size, (int, long)):
3879 self.LogWarning("Disk %d of instance %s did not return valid"
3880 " size information, ignoring", idx, instance.name)
3883 if size != disk.size:
3884 self.LogInfo("Disk %d of instance %s has mismatched size,"
3885 " correcting: recorded %d, actual %d", idx,
3886 instance.name, disk.size, size)
3888 self.cfg.Update(instance, feedback_fn)
3889 changed.append((instance.name, idx, size))
3890 if self._EnsureChildSizes(disk):
3891 self.cfg.Update(instance, feedback_fn)
3892 changed.append((instance.name, idx, disk.size))
3896 class LUClusterRename(LogicalUnit):
3897 """Rename the cluster.
3900 HPATH = "cluster-rename"
3901 HTYPE = constants.HTYPE_CLUSTER
3903 def BuildHooksEnv(self):
3908 "OP_TARGET": self.cfg.GetClusterName(),
3909 "NEW_NAME": self.op.name,
3912 def BuildHooksNodes(self):
3913 """Build hooks nodes.
3916 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3918 def CheckPrereq(self):
3919 """Verify that the passed name is a valid one.
3922 hostname = netutils.GetHostname(name=self.op.name,
3923 family=self.cfg.GetPrimaryIPFamily())
3925 new_name = hostname.name
3926 self.ip = new_ip = hostname.ip
3927 old_name = self.cfg.GetClusterName()
3928 old_ip = self.cfg.GetMasterIP()
3929 if new_name == old_name and new_ip == old_ip:
3930 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3931 " cluster has changed",
3933 if new_ip != old_ip:
3934 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3935 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3936 " reachable on the network" %
3937 new_ip, errors.ECODE_NOTUNIQUE)
3939 self.op.name = new_name
3941 def Exec(self, feedback_fn):
3942 """Rename the cluster.
3945 clustername = self.op.name
3948 # shutdown the master IP
3949 master_params = self.cfg.GetMasterNetworkParameters()
3950 ems = self.cfg.GetUseExternalMipScript()
3951 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3953 result.Raise("Could not disable the master role")
3956 cluster = self.cfg.GetClusterInfo()
3957 cluster.cluster_name = clustername
3958 cluster.master_ip = new_ip
3959 self.cfg.Update(cluster, feedback_fn)
3961 # update the known hosts file
3962 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3963 node_list = self.cfg.GetOnlineNodeList()
3965 node_list.remove(master_params.name)
3968 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3970 master_params.ip = new_ip
3971 result = self.rpc.call_node_activate_master_ip(master_params.name,
3973 msg = result.fail_msg
3975 self.LogWarning("Could not re-enable the master role on"
3976 " the master, please restart manually: %s", msg)
3981 def _ValidateNetmask(cfg, netmask):
3982 """Checks if a netmask is valid.
3984 @type cfg: L{config.ConfigWriter}
3985 @param cfg: The cluster configuration
3987 @param netmask: the netmask to be verified
3988 @raise errors.OpPrereqError: if the validation fails
3991 ip_family = cfg.GetPrimaryIPFamily()
3993 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3994 except errors.ProgrammerError:
3995 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3996 ip_family, errors.ECODE_INVAL)
3997 if not ipcls.ValidateNetmask(netmask):
3998 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3999 (netmask), errors.ECODE_INVAL)
4002 class LUClusterSetParams(LogicalUnit):
4003 """Change the parameters of the cluster.
4006 HPATH = "cluster-modify"
4007 HTYPE = constants.HTYPE_CLUSTER
4010 def CheckArguments(self):
4014 if self.op.uid_pool:
4015 uidpool.CheckUidPool(self.op.uid_pool)
4017 if self.op.add_uids:
4018 uidpool.CheckUidPool(self.op.add_uids)
4020 if self.op.remove_uids:
4021 uidpool.CheckUidPool(self.op.remove_uids)
4023 if self.op.master_netmask is not None:
4024 _ValidateNetmask(self.cfg, self.op.master_netmask)
4026 if self.op.diskparams:
4027 for dt_params in self.op.diskparams.values():
4028 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4030 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4031 except errors.OpPrereqError, err:
4032 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4035 def ExpandNames(self):
4036 # FIXME: in the future maybe other cluster params won't require checking on
4037 # all nodes to be modified.
4038 self.needed_locks = {
4039 locking.LEVEL_NODE: locking.ALL_SET,
4040 locking.LEVEL_INSTANCE: locking.ALL_SET,
4041 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4043 self.share_locks = {
4044 locking.LEVEL_NODE: 1,
4045 locking.LEVEL_INSTANCE: 1,
4046 locking.LEVEL_NODEGROUP: 1,
4049 def BuildHooksEnv(self):
4054 "OP_TARGET": self.cfg.GetClusterName(),
4055 "NEW_VG_NAME": self.op.vg_name,
4058 def BuildHooksNodes(self):
4059 """Build hooks nodes.
4062 mn = self.cfg.GetMasterNode()
4065 def CheckPrereq(self):
4066 """Check prerequisites.
4068 This checks whether the given params don't conflict and
4069 if the given volume group is valid.
4072 if self.op.vg_name is not None and not self.op.vg_name:
4073 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4074 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4075 " instances exist", errors.ECODE_INVAL)
4077 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4078 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4079 raise errors.OpPrereqError("Cannot disable drbd helper while"
4080 " drbd-based instances exist",
4083 node_list = self.owned_locks(locking.LEVEL_NODE)
4085 # if vg_name not None, checks given volume group on all nodes
4087 vglist = self.rpc.call_vg_list(node_list)
4088 for node in node_list:
4089 msg = vglist[node].fail_msg
4091 # ignoring down node
4092 self.LogWarning("Error while gathering data on node %s"
4093 " (ignoring node): %s", node, msg)
4095 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4097 constants.MIN_VG_SIZE)
4099 raise errors.OpPrereqError("Error on node '%s': %s" %
4100 (node, vgstatus), errors.ECODE_ENVIRON)
4102 if self.op.drbd_helper:
4103 # checks given drbd helper on all nodes
4104 helpers = self.rpc.call_drbd_helper(node_list)
4105 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4107 self.LogInfo("Not checking drbd helper on offline node %s", node)
4109 msg = helpers[node].fail_msg
4111 raise errors.OpPrereqError("Error checking drbd helper on node"
4112 " '%s': %s" % (node, msg),
4113 errors.ECODE_ENVIRON)
4114 node_helper = helpers[node].payload
4115 if node_helper != self.op.drbd_helper:
4116 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4117 (node, node_helper), errors.ECODE_ENVIRON)
4119 self.cluster = cluster = self.cfg.GetClusterInfo()
4120 # validate params changes
4121 if self.op.beparams:
4122 objects.UpgradeBeParams(self.op.beparams)
4123 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4124 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4126 if self.op.ndparams:
4127 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4128 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4130 # TODO: we need a more general way to handle resetting
4131 # cluster-level parameters to default values
4132 if self.new_ndparams["oob_program"] == "":
4133 self.new_ndparams["oob_program"] = \
4134 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4136 if self.op.hv_state:
4137 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4138 self.cluster.hv_state_static)
4139 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4140 for hv, values in new_hv_state.items())
4142 if self.op.disk_state:
4143 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4144 self.cluster.disk_state_static)
4145 self.new_disk_state = \
4146 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4147 for name, values in svalues.items()))
4148 for storage, svalues in new_disk_state.items())
4151 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4154 all_instances = self.cfg.GetAllInstancesInfo().values()
4156 for group in self.cfg.GetAllNodeGroupsInfo().values():
4157 instances = frozenset([inst for inst in all_instances
4158 if compat.any(node in group.members
4159 for node in inst.all_nodes)])
4160 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4161 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4162 new = _ComputeNewInstanceViolations(ipol,
4163 new_ipolicy, instances)
4165 violations.update(new)
4168 self.LogWarning("After the ipolicy change the following instances"
4169 " violate them: %s",
4170 utils.CommaJoin(utils.NiceSort(violations)))
4172 if self.op.nicparams:
4173 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4174 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4175 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4178 # check all instances for consistency
4179 for instance in self.cfg.GetAllInstancesInfo().values():
4180 for nic_idx, nic in enumerate(instance.nics):
4181 params_copy = copy.deepcopy(nic.nicparams)
4182 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4184 # check parameter syntax
4186 objects.NIC.CheckParameterSyntax(params_filled)
4187 except errors.ConfigurationError, err:
4188 nic_errors.append("Instance %s, nic/%d: %s" %
4189 (instance.name, nic_idx, err))
4191 # if we're moving instances to routed, check that they have an ip
4192 target_mode = params_filled[constants.NIC_MODE]
4193 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4194 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4195 " address" % (instance.name, nic_idx))
4197 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4198 "\n".join(nic_errors), errors.ECODE_INVAL)
4200 # hypervisor list/parameters
4201 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4202 if self.op.hvparams:
4203 for hv_name, hv_dict in self.op.hvparams.items():
4204 if hv_name not in self.new_hvparams:
4205 self.new_hvparams[hv_name] = hv_dict
4207 self.new_hvparams[hv_name].update(hv_dict)
4209 # disk template parameters
4210 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4211 if self.op.diskparams:
4212 for dt_name, dt_params in self.op.diskparams.items():
4213 if dt_name not in self.op.diskparams:
4214 self.new_diskparams[dt_name] = dt_params
4216 self.new_diskparams[dt_name].update(dt_params)
4218 # os hypervisor parameters
4219 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4221 for os_name, hvs in self.op.os_hvp.items():
4222 if os_name not in self.new_os_hvp:
4223 self.new_os_hvp[os_name] = hvs
4225 for hv_name, hv_dict in hvs.items():
4226 if hv_name not in self.new_os_hvp[os_name]:
4227 self.new_os_hvp[os_name][hv_name] = hv_dict
4229 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4232 self.new_osp = objects.FillDict(cluster.osparams, {})
4233 if self.op.osparams:
4234 for os_name, osp in self.op.osparams.items():
4235 if os_name not in self.new_osp:
4236 self.new_osp[os_name] = {}
4238 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4241 if not self.new_osp[os_name]:
4242 # we removed all parameters
4243 del self.new_osp[os_name]
4245 # check the parameter validity (remote check)
4246 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4247 os_name, self.new_osp[os_name])
4249 # changes to the hypervisor list
4250 if self.op.enabled_hypervisors is not None:
4251 self.hv_list = self.op.enabled_hypervisors
4252 for hv in self.hv_list:
4253 # if the hypervisor doesn't already exist in the cluster
4254 # hvparams, we initialize it to empty, and then (in both
4255 # cases) we make sure to fill the defaults, as we might not
4256 # have a complete defaults list if the hypervisor wasn't
4258 if hv not in new_hvp:
4260 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4261 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4263 self.hv_list = cluster.enabled_hypervisors
4265 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4266 # either the enabled list has changed, or the parameters have, validate
4267 for hv_name, hv_params in self.new_hvparams.items():
4268 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4269 (self.op.enabled_hypervisors and
4270 hv_name in self.op.enabled_hypervisors)):
4271 # either this is a new hypervisor, or its parameters have changed
4272 hv_class = hypervisor.GetHypervisor(hv_name)
4273 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4274 hv_class.CheckParameterSyntax(hv_params)
4275 _CheckHVParams(self, node_list, hv_name, hv_params)
4278 # no need to check any newly-enabled hypervisors, since the
4279 # defaults have already been checked in the above code-block
4280 for os_name, os_hvp in self.new_os_hvp.items():
4281 for hv_name, hv_params in os_hvp.items():
4282 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4283 # we need to fill in the new os_hvp on top of the actual hv_p
4284 cluster_defaults = self.new_hvparams.get(hv_name, {})
4285 new_osp = objects.FillDict(cluster_defaults, hv_params)
4286 hv_class = hypervisor.GetHypervisor(hv_name)
4287 hv_class.CheckParameterSyntax(new_osp)
4288 _CheckHVParams(self, node_list, hv_name, new_osp)
4290 if self.op.default_iallocator:
4291 alloc_script = utils.FindFile(self.op.default_iallocator,
4292 constants.IALLOCATOR_SEARCH_PATH,
4294 if alloc_script is None:
4295 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4296 " specified" % self.op.default_iallocator,
4299 def Exec(self, feedback_fn):
4300 """Change the parameters of the cluster.
4303 if self.op.vg_name is not None:
4304 new_volume = self.op.vg_name
4307 if new_volume != self.cfg.GetVGName():
4308 self.cfg.SetVGName(new_volume)
4310 feedback_fn("Cluster LVM configuration already in desired"
4311 " state, not changing")
4312 if self.op.drbd_helper is not None:
4313 new_helper = self.op.drbd_helper
4316 if new_helper != self.cfg.GetDRBDHelper():
4317 self.cfg.SetDRBDHelper(new_helper)
4319 feedback_fn("Cluster DRBD helper already in desired state,"
4321 if self.op.hvparams:
4322 self.cluster.hvparams = self.new_hvparams
4324 self.cluster.os_hvp = self.new_os_hvp
4325 if self.op.enabled_hypervisors is not None:
4326 self.cluster.hvparams = self.new_hvparams
4327 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4328 if self.op.beparams:
4329 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4330 if self.op.nicparams:
4331 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4333 self.cluster.ipolicy = self.new_ipolicy
4334 if self.op.osparams:
4335 self.cluster.osparams = self.new_osp
4336 if self.op.ndparams:
4337 self.cluster.ndparams = self.new_ndparams
4338 if self.op.diskparams:
4339 self.cluster.diskparams = self.new_diskparams
4340 if self.op.hv_state:
4341 self.cluster.hv_state_static = self.new_hv_state
4342 if self.op.disk_state:
4343 self.cluster.disk_state_static = self.new_disk_state
4345 if self.op.candidate_pool_size is not None:
4346 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4347 # we need to update the pool size here, otherwise the save will fail
4348 _AdjustCandidatePool(self, [])
4350 if self.op.maintain_node_health is not None:
4351 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4352 feedback_fn("Note: CONFD was disabled at build time, node health"
4353 " maintenance is not useful (still enabling it)")
4354 self.cluster.maintain_node_health = self.op.maintain_node_health
4356 if self.op.prealloc_wipe_disks is not None:
4357 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4359 if self.op.add_uids is not None:
4360 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4362 if self.op.remove_uids is not None:
4363 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4365 if self.op.uid_pool is not None:
4366 self.cluster.uid_pool = self.op.uid_pool
4368 if self.op.default_iallocator is not None:
4369 self.cluster.default_iallocator = self.op.default_iallocator
4371 if self.op.reserved_lvs is not None:
4372 self.cluster.reserved_lvs = self.op.reserved_lvs
4374 if self.op.use_external_mip_script is not None:
4375 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4377 def helper_os(aname, mods, desc):
4379 lst = getattr(self.cluster, aname)
4380 for key, val in mods:
4381 if key == constants.DDM_ADD:
4383 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4386 elif key == constants.DDM_REMOVE:
4390 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4392 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4394 if self.op.hidden_os:
4395 helper_os("hidden_os", self.op.hidden_os, "hidden")
4397 if self.op.blacklisted_os:
4398 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4400 if self.op.master_netdev:
4401 master_params = self.cfg.GetMasterNetworkParameters()
4402 ems = self.cfg.GetUseExternalMipScript()
4403 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4404 self.cluster.master_netdev)
4405 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4407 result.Raise("Could not disable the master ip")
4408 feedback_fn("Changing master_netdev from %s to %s" %
4409 (master_params.netdev, self.op.master_netdev))
4410 self.cluster.master_netdev = self.op.master_netdev
4412 if self.op.master_netmask:
4413 master_params = self.cfg.GetMasterNetworkParameters()
4414 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4415 result = self.rpc.call_node_change_master_netmask(master_params.name,
4416 master_params.netmask,
4417 self.op.master_netmask,
4419 master_params.netdev)
4421 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4424 self.cluster.master_netmask = self.op.master_netmask
4426 self.cfg.Update(self.cluster, feedback_fn)
4428 if self.op.master_netdev:
4429 master_params = self.cfg.GetMasterNetworkParameters()
4430 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4431 self.op.master_netdev)
4432 ems = self.cfg.GetUseExternalMipScript()
4433 result = self.rpc.call_node_activate_master_ip(master_params.name,
4436 self.LogWarning("Could not re-enable the master ip on"
4437 " the master, please restart manually: %s",
4441 def _UploadHelper(lu, nodes, fname):
4442 """Helper for uploading a file and showing warnings.
4445 if os.path.exists(fname):
4446 result = lu.rpc.call_upload_file(nodes, fname)
4447 for to_node, to_result in result.items():
4448 msg = to_result.fail_msg
4450 msg = ("Copy of file %s to node %s failed: %s" %
4451 (fname, to_node, msg))
4452 lu.proc.LogWarning(msg)
4455 def _ComputeAncillaryFiles(cluster, redist):
4456 """Compute files external to Ganeti which need to be consistent.
4458 @type redist: boolean
4459 @param redist: Whether to include files which need to be redistributed
4462 # Compute files for all nodes
4464 pathutils.SSH_KNOWN_HOSTS_FILE,
4465 pathutils.CONFD_HMAC_KEY,
4466 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4467 pathutils.SPICE_CERT_FILE,
4468 pathutils.SPICE_CACERT_FILE,
4469 pathutils.RAPI_USERS_FILE,
4473 # we need to ship at least the RAPI certificate
4474 files_all.add(pathutils.RAPI_CERT_FILE)
4476 files_all.update(pathutils.ALL_CERT_FILES)
4477 files_all.update(ssconf.SimpleStore().GetFileList())
4479 if cluster.modify_etc_hosts:
4480 files_all.add(pathutils.ETC_HOSTS)
4482 if cluster.use_external_mip_script:
4483 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4485 # Files which are optional, these must:
4486 # - be present in one other category as well
4487 # - either exist or not exist on all nodes of that category (mc, vm all)
4489 pathutils.RAPI_USERS_FILE,
4492 # Files which should only be on master candidates
4496 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4500 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4501 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4502 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4504 # Files which should only be on VM-capable nodes
4507 for hv_name in cluster.enabled_hypervisors
4508 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4512 for hv_name in cluster.enabled_hypervisors
4513 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4515 # Filenames in each category must be unique
4516 all_files_set = files_all | files_mc | files_vm
4517 assert (len(all_files_set) ==
4518 sum(map(len, [files_all, files_mc, files_vm]))), \
4519 "Found file listed in more than one file list"
4521 # Optional files must be present in one other category
4522 assert all_files_set.issuperset(files_opt), \
4523 "Optional file not in a different required list"
4525 # This one file should never ever be re-distributed via RPC
4526 assert not (redist and
4527 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4529 return (files_all, files_opt, files_mc, files_vm)
4532 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4533 """Distribute additional files which are part of the cluster configuration.
4535 ConfigWriter takes care of distributing the config and ssconf files, but
4536 there are more files which should be distributed to all nodes. This function
4537 makes sure those are copied.
4539 @param lu: calling logical unit
4540 @param additional_nodes: list of nodes not in the config to distribute to
4541 @type additional_vm: boolean
4542 @param additional_vm: whether the additional nodes are vm-capable or not
4545 # Gather target nodes
4546 cluster = lu.cfg.GetClusterInfo()
4547 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4549 online_nodes = lu.cfg.GetOnlineNodeList()
4550 online_set = frozenset(online_nodes)
4551 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4553 if additional_nodes is not None:
4554 online_nodes.extend(additional_nodes)
4556 vm_nodes.extend(additional_nodes)
4558 # Never distribute to master node
4559 for nodelist in [online_nodes, vm_nodes]:
4560 if master_info.name in nodelist:
4561 nodelist.remove(master_info.name)
4564 (files_all, _, files_mc, files_vm) = \
4565 _ComputeAncillaryFiles(cluster, True)
4567 # Never re-distribute configuration file from here
4568 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4569 pathutils.CLUSTER_CONF_FILE in files_vm)
4570 assert not files_mc, "Master candidates not handled in this function"
4573 (online_nodes, files_all),
4574 (vm_nodes, files_vm),
4578 for (node_list, files) in filemap:
4580 _UploadHelper(lu, node_list, fname)
4583 class LUClusterRedistConf(NoHooksLU):
4584 """Force the redistribution of cluster configuration.
4586 This is a very simple LU.
4591 def ExpandNames(self):
4592 self.needed_locks = {
4593 locking.LEVEL_NODE: locking.ALL_SET,
4595 self.share_locks[locking.LEVEL_NODE] = 1
4597 def Exec(self, feedback_fn):
4598 """Redistribute the configuration.
4601 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4602 _RedistributeAncillaryFiles(self)
4605 class LUClusterActivateMasterIp(NoHooksLU):
4606 """Activate the master IP on the master node.
4609 def Exec(self, feedback_fn):
4610 """Activate the master IP.
4613 master_params = self.cfg.GetMasterNetworkParameters()
4614 ems = self.cfg.GetUseExternalMipScript()
4615 result = self.rpc.call_node_activate_master_ip(master_params.name,
4617 result.Raise("Could not activate the master IP")
4620 class LUClusterDeactivateMasterIp(NoHooksLU):
4621 """Deactivate the master IP on the master node.
4624 def Exec(self, feedback_fn):
4625 """Deactivate the master IP.
4628 master_params = self.cfg.GetMasterNetworkParameters()
4629 ems = self.cfg.GetUseExternalMipScript()
4630 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4632 result.Raise("Could not deactivate the master IP")
4635 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4636 """Sleep and poll for an instance's disk to sync.
4639 if not instance.disks or disks is not None and not disks:
4642 disks = _ExpandCheckDisks(instance, disks)
4645 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4647 node = instance.primary_node
4650 lu.cfg.SetDiskID(dev, node)
4652 # TODO: Convert to utils.Retry
4655 degr_retries = 10 # in seconds, as we sleep 1 second each time
4659 cumul_degraded = False
4660 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4661 msg = rstats.fail_msg
4663 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4666 raise errors.RemoteError("Can't contact node %s for mirror data,"
4667 " aborting." % node)
4670 rstats = rstats.payload
4672 for i, mstat in enumerate(rstats):
4674 lu.LogWarning("Can't compute data for node %s/%s",
4675 node, disks[i].iv_name)
4678 cumul_degraded = (cumul_degraded or
4679 (mstat.is_degraded and mstat.sync_percent is None))
4680 if mstat.sync_percent is not None:
4682 if mstat.estimated_time is not None:
4683 rem_time = ("%s remaining (estimated)" %
4684 utils.FormatSeconds(mstat.estimated_time))
4685 max_time = mstat.estimated_time
4687 rem_time = "no time estimate"
4688 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4689 (disks[i].iv_name, mstat.sync_percent, rem_time))
4691 # if we're done but degraded, let's do a few small retries, to
4692 # make sure we see a stable and not transient situation; therefore
4693 # we force restart of the loop
4694 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4695 logging.info("Degraded disks found, %d retries left", degr_retries)
4703 time.sleep(min(60, max_time))
4706 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4707 return not cumul_degraded
4710 def _BlockdevFind(lu, node, dev, instance):
4711 """Wrapper around call_blockdev_find to annotate diskparams.
4713 @param lu: A reference to the lu object
4714 @param node: The node to call out
4715 @param dev: The device to find
4716 @param instance: The instance object the device belongs to
4717 @returns The result of the rpc call
4720 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4721 return lu.rpc.call_blockdev_find(node, disk)
4724 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4725 """Wrapper around L{_CheckDiskConsistencyInner}.
4728 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4729 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4733 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4735 """Check that mirrors are not degraded.
4737 @attention: The device has to be annotated already.
4739 The ldisk parameter, if True, will change the test from the
4740 is_degraded attribute (which represents overall non-ok status for
4741 the device(s)) to the ldisk (representing the local storage status).
4744 lu.cfg.SetDiskID(dev, node)
4748 if on_primary or dev.AssembleOnSecondary():
4749 rstats = lu.rpc.call_blockdev_find(node, dev)
4750 msg = rstats.fail_msg
4752 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4754 elif not rstats.payload:
4755 lu.LogWarning("Can't find disk on node %s", node)
4759 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4761 result = result and not rstats.payload.is_degraded
4764 for child in dev.children:
4765 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4771 class LUOobCommand(NoHooksLU):
4772 """Logical unit for OOB handling.
4776 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4778 def ExpandNames(self):
4779 """Gather locks we need.
4782 if self.op.node_names:
4783 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4784 lock_names = self.op.node_names
4786 lock_names = locking.ALL_SET
4788 self.needed_locks = {
4789 locking.LEVEL_NODE: lock_names,
4792 def CheckPrereq(self):
4793 """Check prerequisites.
4796 - the node exists in the configuration
4799 Any errors are signaled by raising errors.OpPrereqError.
4803 self.master_node = self.cfg.GetMasterNode()
4805 assert self.op.power_delay >= 0.0
4807 if self.op.node_names:
4808 if (self.op.command in self._SKIP_MASTER and
4809 self.master_node in self.op.node_names):
4810 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4811 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4813 if master_oob_handler:
4814 additional_text = ("run '%s %s %s' if you want to operate on the"
4815 " master regardless") % (master_oob_handler,
4819 additional_text = "it does not support out-of-band operations"
4821 raise errors.OpPrereqError(("Operating on the master node %s is not"
4822 " allowed for %s; %s") %
4823 (self.master_node, self.op.command,
4824 additional_text), errors.ECODE_INVAL)
4826 self.op.node_names = self.cfg.GetNodeList()
4827 if self.op.command in self._SKIP_MASTER:
4828 self.op.node_names.remove(self.master_node)
4830 if self.op.command in self._SKIP_MASTER:
4831 assert self.master_node not in self.op.node_names
4833 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4835 raise errors.OpPrereqError("Node %s not found" % node_name,
4838 self.nodes.append(node)
4840 if (not self.op.ignore_status and
4841 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4842 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4843 " not marked offline") % node_name,
4846 def Exec(self, feedback_fn):
4847 """Execute OOB and return result if we expect any.
4850 master_node = self.master_node
4853 for idx, node in enumerate(utils.NiceSort(self.nodes,
4854 key=lambda node: node.name)):
4855 node_entry = [(constants.RS_NORMAL, node.name)]
4856 ret.append(node_entry)
4858 oob_program = _SupportsOob(self.cfg, node)
4861 node_entry.append((constants.RS_UNAVAIL, None))
4864 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4865 self.op.command, oob_program, node.name)
4866 result = self.rpc.call_run_oob(master_node, oob_program,
4867 self.op.command, node.name,
4871 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4872 node.name, result.fail_msg)
4873 node_entry.append((constants.RS_NODATA, None))
4876 self._CheckPayload(result)
4877 except errors.OpExecError, err:
4878 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4880 node_entry.append((constants.RS_NODATA, None))
4882 if self.op.command == constants.OOB_HEALTH:
4883 # For health we should log important events
4884 for item, status in result.payload:
4885 if status in [constants.OOB_STATUS_WARNING,
4886 constants.OOB_STATUS_CRITICAL]:
4887 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4888 item, node.name, status)
4890 if self.op.command == constants.OOB_POWER_ON:
4892 elif self.op.command == constants.OOB_POWER_OFF:
4893 node.powered = False
4894 elif self.op.command == constants.OOB_POWER_STATUS:
4895 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4896 if powered != node.powered:
4897 logging.warning(("Recorded power state (%s) of node '%s' does not"
4898 " match actual power state (%s)"), node.powered,
4901 # For configuration changing commands we should update the node
4902 if self.op.command in (constants.OOB_POWER_ON,
4903 constants.OOB_POWER_OFF):
4904 self.cfg.Update(node, feedback_fn)
4906 node_entry.append((constants.RS_NORMAL, result.payload))
4908 if (self.op.command == constants.OOB_POWER_ON and
4909 idx < len(self.nodes) - 1):
4910 time.sleep(self.op.power_delay)
4914 def _CheckPayload(self, result):
4915 """Checks if the payload is valid.
4917 @param result: RPC result
4918 @raises errors.OpExecError: If payload is not valid
4922 if self.op.command == constants.OOB_HEALTH:
4923 if not isinstance(result.payload, list):
4924 errs.append("command 'health' is expected to return a list but got %s" %
4925 type(result.payload))
4927 for item, status in result.payload:
4928 if status not in constants.OOB_STATUSES:
4929 errs.append("health item '%s' has invalid status '%s'" %
4932 if self.op.command == constants.OOB_POWER_STATUS:
4933 if not isinstance(result.payload, dict):
4934 errs.append("power-status is expected to return a dict but got %s" %
4935 type(result.payload))
4937 if self.op.command in [
4938 constants.OOB_POWER_ON,
4939 constants.OOB_POWER_OFF,
4940 constants.OOB_POWER_CYCLE,
4942 if result.payload is not None:
4943 errs.append("%s is expected to not return payload but got '%s'" %
4944 (self.op.command, result.payload))
4947 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4948 utils.CommaJoin(errs))
4951 class _OsQuery(_QueryBase):
4952 FIELDS = query.OS_FIELDS
4954 def ExpandNames(self, lu):
4955 # Lock all nodes in shared mode
4956 # Temporary removal of locks, should be reverted later
4957 # TODO: reintroduce locks when they are lighter-weight
4958 lu.needed_locks = {}
4959 #self.share_locks[locking.LEVEL_NODE] = 1
4960 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4962 # The following variables interact with _QueryBase._GetNames
4964 self.wanted = self.names
4966 self.wanted = locking.ALL_SET
4968 self.do_locking = self.use_locking
4970 def DeclareLocks(self, lu, level):
4974 def _DiagnoseByOS(rlist):
4975 """Remaps a per-node return list into an a per-os per-node dictionary
4977 @param rlist: a map with node names as keys and OS objects as values
4980 @return: a dictionary with osnames as keys and as value another
4981 map, with nodes as keys and tuples of (path, status, diagnose,
4982 variants, parameters, api_versions) as values, eg::
4984 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4985 (/srv/..., False, "invalid api")],
4986 "node2": [(/srv/..., True, "", [], [])]}
4991 # we build here the list of nodes that didn't fail the RPC (at RPC
4992 # level), so that nodes with a non-responding node daemon don't
4993 # make all OSes invalid
4994 good_nodes = [node_name for node_name in rlist
4995 if not rlist[node_name].fail_msg]
4996 for node_name, nr in rlist.items():
4997 if nr.fail_msg or not nr.payload:
4999 for (name, path, status, diagnose, variants,
5000 params, api_versions) in nr.payload:
5001 if name not in all_os:
5002 # build a list of nodes for this os containing empty lists
5003 # for each node in node_list
5005 for nname in good_nodes:
5006 all_os[name][nname] = []
5007 # convert params from [name, help] to (name, help)
5008 params = [tuple(v) for v in params]
5009 all_os[name][node_name].append((path, status, diagnose,
5010 variants, params, api_versions))
5013 def _GetQueryData(self, lu):
5014 """Computes the list of nodes and their attributes.
5017 # Locking is not used
5018 assert not (compat.any(lu.glm.is_owned(level)
5019 for level in locking.LEVELS
5020 if level != locking.LEVEL_CLUSTER) or
5021 self.do_locking or self.use_locking)
5023 valid_nodes = [node.name
5024 for node in lu.cfg.GetAllNodesInfo().values()
5025 if not node.offline and node.vm_capable]
5026 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5027 cluster = lu.cfg.GetClusterInfo()
5031 for (os_name, os_data) in pol.items():
5032 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5033 hidden=(os_name in cluster.hidden_os),
5034 blacklisted=(os_name in cluster.blacklisted_os))
5038 api_versions = set()
5040 for idx, osl in enumerate(os_data.values()):
5041 info.valid = bool(info.valid and osl and osl[0][1])
5045 (node_variants, node_params, node_api) = osl[0][3:6]
5048 variants.update(node_variants)
5049 parameters.update(node_params)
5050 api_versions.update(node_api)
5052 # Filter out inconsistent values
5053 variants.intersection_update(node_variants)
5054 parameters.intersection_update(node_params)
5055 api_versions.intersection_update(node_api)
5057 info.variants = list(variants)
5058 info.parameters = list(parameters)
5059 info.api_versions = list(api_versions)
5061 data[os_name] = info
5063 # Prepare data in requested order
5064 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5068 class LUOsDiagnose(NoHooksLU):
5069 """Logical unit for OS diagnose/query.
5075 def _BuildFilter(fields, names):
5076 """Builds a filter for querying OSes.
5079 name_filter = qlang.MakeSimpleFilter("name", names)
5081 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5082 # respective field is not requested
5083 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5084 for fname in ["hidden", "blacklisted"]
5085 if fname not in fields]
5086 if "valid" not in fields:
5087 status_filter.append([qlang.OP_TRUE, "valid"])
5090 status_filter.insert(0, qlang.OP_AND)
5092 status_filter = None
5094 if name_filter and status_filter:
5095 return [qlang.OP_AND, name_filter, status_filter]
5099 return status_filter
5101 def CheckArguments(self):
5102 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5103 self.op.output_fields, False)
5105 def ExpandNames(self):
5106 self.oq.ExpandNames(self)
5108 def Exec(self, feedback_fn):
5109 return self.oq.OldStyleQuery(self)
5112 class LUNodeRemove(LogicalUnit):
5113 """Logical unit for removing a node.
5116 HPATH = "node-remove"
5117 HTYPE = constants.HTYPE_NODE
5119 def BuildHooksEnv(self):
5124 "OP_TARGET": self.op.node_name,
5125 "NODE_NAME": self.op.node_name,
5128 def BuildHooksNodes(self):
5129 """Build hooks nodes.
5131 This doesn't run on the target node in the pre phase as a failed
5132 node would then be impossible to remove.
5135 all_nodes = self.cfg.GetNodeList()
5137 all_nodes.remove(self.op.node_name)
5140 return (all_nodes, all_nodes)
5142 def CheckPrereq(self):
5143 """Check prerequisites.
5146 - the node exists in the configuration
5147 - it does not have primary or secondary instances
5148 - it's not the master
5150 Any errors are signaled by raising errors.OpPrereqError.
5153 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5154 node = self.cfg.GetNodeInfo(self.op.node_name)
5155 assert node is not None
5157 masternode = self.cfg.GetMasterNode()
5158 if node.name == masternode:
5159 raise errors.OpPrereqError("Node is the master node, failover to another"
5160 " node is required", errors.ECODE_INVAL)
5162 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5163 if node.name in instance.all_nodes:
5164 raise errors.OpPrereqError("Instance %s is still running on the node,"
5165 " please remove first" % instance_name,
5167 self.op.node_name = node.name
5170 def Exec(self, feedback_fn):
5171 """Removes the node from the cluster.
5175 logging.info("Stopping the node daemon and removing configs from node %s",
5178 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5180 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5183 # Promote nodes to master candidate as needed
5184 _AdjustCandidatePool(self, exceptions=[node.name])
5185 self.context.RemoveNode(node.name)
5187 # Run post hooks on the node before it's removed
5188 _RunPostHook(self, node.name)
5190 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5191 msg = result.fail_msg
5193 self.LogWarning("Errors encountered on the remote node while leaving"
5194 " the cluster: %s", msg)
5196 # Remove node from our /etc/hosts
5197 if self.cfg.GetClusterInfo().modify_etc_hosts:
5198 master_node = self.cfg.GetMasterNode()
5199 result = self.rpc.call_etc_hosts_modify(master_node,
5200 constants.ETC_HOSTS_REMOVE,
5202 result.Raise("Can't update hosts file with new host data")
5203 _RedistributeAncillaryFiles(self)
5206 class _NodeQuery(_QueryBase):
5207 FIELDS = query.NODE_FIELDS
5209 def ExpandNames(self, lu):
5210 lu.needed_locks = {}
5211 lu.share_locks = _ShareAll()
5214 self.wanted = _GetWantedNodes(lu, self.names)
5216 self.wanted = locking.ALL_SET
5218 self.do_locking = (self.use_locking and
5219 query.NQ_LIVE in self.requested_data)
5222 # If any non-static field is requested we need to lock the nodes
5223 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5225 def DeclareLocks(self, lu, level):
5228 def _GetQueryData(self, lu):
5229 """Computes the list of nodes and their attributes.
5232 all_info = lu.cfg.GetAllNodesInfo()
5234 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5236 # Gather data as requested
5237 if query.NQ_LIVE in self.requested_data:
5238 # filter out non-vm_capable nodes
5239 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5241 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5242 [lu.cfg.GetHypervisorType()])
5243 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5244 for (name, nresult) in node_data.items()
5245 if not nresult.fail_msg and nresult.payload)
5249 if query.NQ_INST in self.requested_data:
5250 node_to_primary = dict([(name, set()) for name in nodenames])
5251 node_to_secondary = dict([(name, set()) for name in nodenames])
5253 inst_data = lu.cfg.GetAllInstancesInfo()
5255 for inst in inst_data.values():
5256 if inst.primary_node in node_to_primary:
5257 node_to_primary[inst.primary_node].add(inst.name)
5258 for secnode in inst.secondary_nodes:
5259 if secnode in node_to_secondary:
5260 node_to_secondary[secnode].add(inst.name)
5262 node_to_primary = None
5263 node_to_secondary = None
5265 if query.NQ_OOB in self.requested_data:
5266 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5267 for name, node in all_info.iteritems())
5271 if query.NQ_GROUP in self.requested_data:
5272 groups = lu.cfg.GetAllNodeGroupsInfo()
5276 return query.NodeQueryData([all_info[name] for name in nodenames],
5277 live_data, lu.cfg.GetMasterNode(),
5278 node_to_primary, node_to_secondary, groups,
5279 oob_support, lu.cfg.GetClusterInfo())
5282 class LUNodeQuery(NoHooksLU):
5283 """Logical unit for querying nodes.
5286 # pylint: disable=W0142
5289 def CheckArguments(self):
5290 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5291 self.op.output_fields, self.op.use_locking)
5293 def ExpandNames(self):
5294 self.nq.ExpandNames(self)
5296 def DeclareLocks(self, level):
5297 self.nq.DeclareLocks(self, level)
5299 def Exec(self, feedback_fn):
5300 return self.nq.OldStyleQuery(self)
5303 class LUNodeQueryvols(NoHooksLU):
5304 """Logical unit for getting volumes on node(s).
5308 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5309 _FIELDS_STATIC = utils.FieldSet("node")
5311 def CheckArguments(self):
5312 _CheckOutputFields(static=self._FIELDS_STATIC,
5313 dynamic=self._FIELDS_DYNAMIC,
5314 selected=self.op.output_fields)
5316 def ExpandNames(self):
5317 self.share_locks = _ShareAll()
5318 self.needed_locks = {}
5320 if not self.op.nodes:
5321 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5323 self.needed_locks[locking.LEVEL_NODE] = \
5324 _GetWantedNodes(self, self.op.nodes)
5326 def Exec(self, feedback_fn):
5327 """Computes the list of nodes and their attributes.
5330 nodenames = self.owned_locks(locking.LEVEL_NODE)
5331 volumes = self.rpc.call_node_volumes(nodenames)
5333 ilist = self.cfg.GetAllInstancesInfo()
5334 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5337 for node in nodenames:
5338 nresult = volumes[node]
5341 msg = nresult.fail_msg
5343 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5346 node_vols = sorted(nresult.payload,
5347 key=operator.itemgetter("dev"))
5349 for vol in node_vols:
5351 for field in self.op.output_fields:
5354 elif field == "phys":
5358 elif field == "name":
5360 elif field == "size":
5361 val = int(float(vol["size"]))
5362 elif field == "instance":
5363 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5365 raise errors.ParameterError(field)
5366 node_output.append(str(val))
5368 output.append(node_output)
5373 class LUNodeQueryStorage(NoHooksLU):
5374 """Logical unit for getting information on storage units on node(s).
5377 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5380 def CheckArguments(self):
5381 _CheckOutputFields(static=self._FIELDS_STATIC,
5382 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5383 selected=self.op.output_fields)
5385 def ExpandNames(self):
5386 self.share_locks = _ShareAll()
5387 self.needed_locks = {}
5390 self.needed_locks[locking.LEVEL_NODE] = \
5391 _GetWantedNodes(self, self.op.nodes)
5393 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5395 def Exec(self, feedback_fn):
5396 """Computes the list of nodes and their attributes.
5399 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5401 # Always get name to sort by
5402 if constants.SF_NAME in self.op.output_fields:
5403 fields = self.op.output_fields[:]
5405 fields = [constants.SF_NAME] + self.op.output_fields
5407 # Never ask for node or type as it's only known to the LU
5408 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5409 while extra in fields:
5410 fields.remove(extra)
5412 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5413 name_idx = field_idx[constants.SF_NAME]
5415 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5416 data = self.rpc.call_storage_list(self.nodes,
5417 self.op.storage_type, st_args,
5418 self.op.name, fields)
5422 for node in utils.NiceSort(self.nodes):
5423 nresult = data[node]
5427 msg = nresult.fail_msg
5429 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5432 rows = dict([(row[name_idx], row) for row in nresult.payload])
5434 for name in utils.NiceSort(rows.keys()):
5439 for field in self.op.output_fields:
5440 if field == constants.SF_NODE:
5442 elif field == constants.SF_TYPE:
5443 val = self.op.storage_type
5444 elif field in field_idx:
5445 val = row[field_idx[field]]
5447 raise errors.ParameterError(field)
5456 class _InstanceQuery(_QueryBase):
5457 FIELDS = query.INSTANCE_FIELDS
5459 def ExpandNames(self, lu):
5460 lu.needed_locks = {}
5461 lu.share_locks = _ShareAll()
5464 self.wanted = _GetWantedInstances(lu, self.names)
5466 self.wanted = locking.ALL_SET
5468 self.do_locking = (self.use_locking and
5469 query.IQ_LIVE in self.requested_data)
5471 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5472 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5473 lu.needed_locks[locking.LEVEL_NODE] = []
5474 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5476 self.do_grouplocks = (self.do_locking and
5477 query.IQ_NODES in self.requested_data)
5479 def DeclareLocks(self, lu, level):
5481 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5482 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5484 # Lock all groups used by instances optimistically; this requires going
5485 # via the node before it's locked, requiring verification later on
5486 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5488 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5489 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5490 elif level == locking.LEVEL_NODE:
5491 lu._LockInstancesNodes() # pylint: disable=W0212
5494 def _CheckGroupLocks(lu):
5495 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5496 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5498 # Check if node groups for locked instances are still correct
5499 for instance_name in owned_instances:
5500 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5502 def _GetQueryData(self, lu):
5503 """Computes the list of instances and their attributes.
5506 if self.do_grouplocks:
5507 self._CheckGroupLocks(lu)
5509 cluster = lu.cfg.GetClusterInfo()
5510 all_info = lu.cfg.GetAllInstancesInfo()
5512 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5514 instance_list = [all_info[name] for name in instance_names]
5515 nodes = frozenset(itertools.chain(*(inst.all_nodes
5516 for inst in instance_list)))
5517 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5520 wrongnode_inst = set()
5522 # Gather data as requested
5523 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5525 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5527 result = node_data[name]
5529 # offline nodes will be in both lists
5530 assert result.fail_msg
5531 offline_nodes.append(name)
5533 bad_nodes.append(name)
5534 elif result.payload:
5535 for inst in result.payload:
5536 if inst in all_info:
5537 if all_info[inst].primary_node == name:
5538 live_data.update(result.payload)
5540 wrongnode_inst.add(inst)
5542 # orphan instance; we don't list it here as we don't
5543 # handle this case yet in the output of instance listing
5544 logging.warning("Orphan instance '%s' found on node %s",
5546 # else no instance is alive
5550 if query.IQ_DISKUSAGE in self.requested_data:
5551 gmi = ganeti.masterd.instance
5552 disk_usage = dict((inst.name,
5553 gmi.ComputeDiskSize(inst.disk_template,
5554 [{constants.IDISK_SIZE: disk.size}
5555 for disk in inst.disks]))
5556 for inst in instance_list)
5560 if query.IQ_CONSOLE in self.requested_data:
5562 for inst in instance_list:
5563 if inst.name in live_data:
5564 # Instance is running
5565 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5567 consinfo[inst.name] = None
5568 assert set(consinfo.keys()) == set(instance_names)
5572 if query.IQ_NODES in self.requested_data:
5573 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5575 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5576 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5577 for uuid in set(map(operator.attrgetter("group"),
5583 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5584 disk_usage, offline_nodes, bad_nodes,
5585 live_data, wrongnode_inst, consinfo,
5589 class LUQuery(NoHooksLU):
5590 """Query for resources/items of a certain kind.
5593 # pylint: disable=W0142
5596 def CheckArguments(self):
5597 qcls = _GetQueryImplementation(self.op.what)
5599 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5601 def ExpandNames(self):
5602 self.impl.ExpandNames(self)
5604 def DeclareLocks(self, level):
5605 self.impl.DeclareLocks(self, level)
5607 def Exec(self, feedback_fn):
5608 return self.impl.NewStyleQuery(self)
5611 class LUQueryFields(NoHooksLU):
5612 """Query for resources/items of a certain kind.
5615 # pylint: disable=W0142
5618 def CheckArguments(self):
5619 self.qcls = _GetQueryImplementation(self.op.what)
5621 def ExpandNames(self):
5622 self.needed_locks = {}
5624 def Exec(self, feedback_fn):
5625 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5628 class LUNodeModifyStorage(NoHooksLU):
5629 """Logical unit for modifying a storage volume on a node.
5634 def CheckArguments(self):
5635 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5637 storage_type = self.op.storage_type
5640 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5642 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5643 " modified" % storage_type,
5646 diff = set(self.op.changes.keys()) - modifiable
5648 raise errors.OpPrereqError("The following fields can not be modified for"
5649 " storage units of type '%s': %r" %
5650 (storage_type, list(diff)),
5653 def ExpandNames(self):
5654 self.needed_locks = {
5655 locking.LEVEL_NODE: self.op.node_name,
5658 def Exec(self, feedback_fn):
5659 """Computes the list of nodes and their attributes.
5662 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5663 result = self.rpc.call_storage_modify(self.op.node_name,
5664 self.op.storage_type, st_args,
5665 self.op.name, self.op.changes)
5666 result.Raise("Failed to modify storage unit '%s' on %s" %
5667 (self.op.name, self.op.node_name))
5670 class LUNodeAdd(LogicalUnit):
5671 """Logical unit for adding node to the cluster.
5675 HTYPE = constants.HTYPE_NODE
5676 _NFLAGS = ["master_capable", "vm_capable"]
5678 def CheckArguments(self):
5679 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5680 # validate/normalize the node name
5681 self.hostname = netutils.GetHostname(name=self.op.node_name,
5682 family=self.primary_ip_family)
5683 self.op.node_name = self.hostname.name
5685 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5686 raise errors.OpPrereqError("Cannot readd the master node",
5689 if self.op.readd and self.op.group:
5690 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5691 " being readded", errors.ECODE_INVAL)
5693 def BuildHooksEnv(self):
5696 This will run on all nodes before, and on all nodes + the new node after.
5700 "OP_TARGET": self.op.node_name,
5701 "NODE_NAME": self.op.node_name,
5702 "NODE_PIP": self.op.primary_ip,
5703 "NODE_SIP": self.op.secondary_ip,
5704 "MASTER_CAPABLE": str(self.op.master_capable),
5705 "VM_CAPABLE": str(self.op.vm_capable),
5708 def BuildHooksNodes(self):
5709 """Build hooks nodes.
5712 # Exclude added node
5713 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5714 post_nodes = pre_nodes + [self.op.node_name, ]
5716 return (pre_nodes, post_nodes)
5718 def CheckPrereq(self):
5719 """Check prerequisites.
5722 - the new node is not already in the config
5724 - its parameters (single/dual homed) matches the cluster
5726 Any errors are signaled by raising errors.OpPrereqError.
5730 hostname = self.hostname
5731 node = hostname.name
5732 primary_ip = self.op.primary_ip = hostname.ip
5733 if self.op.secondary_ip is None:
5734 if self.primary_ip_family == netutils.IP6Address.family:
5735 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5736 " IPv4 address must be given as secondary",
5738 self.op.secondary_ip = primary_ip
5740 secondary_ip = self.op.secondary_ip
5741 if not netutils.IP4Address.IsValid(secondary_ip):
5742 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5743 " address" % secondary_ip, errors.ECODE_INVAL)
5745 node_list = cfg.GetNodeList()
5746 if not self.op.readd and node in node_list:
5747 raise errors.OpPrereqError("Node %s is already in the configuration" %
5748 node, errors.ECODE_EXISTS)
5749 elif self.op.readd and node not in node_list:
5750 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5753 self.changed_primary_ip = False
5755 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5756 if self.op.readd and node == existing_node_name:
5757 if existing_node.secondary_ip != secondary_ip:
5758 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5759 " address configuration as before",
5761 if existing_node.primary_ip != primary_ip:
5762 self.changed_primary_ip = True
5766 if (existing_node.primary_ip == primary_ip or
5767 existing_node.secondary_ip == primary_ip or
5768 existing_node.primary_ip == secondary_ip or
5769 existing_node.secondary_ip == secondary_ip):
5770 raise errors.OpPrereqError("New node ip address(es) conflict with"
5771 " existing node %s" % existing_node.name,
5772 errors.ECODE_NOTUNIQUE)
5774 # After this 'if' block, None is no longer a valid value for the
5775 # _capable op attributes
5777 old_node = self.cfg.GetNodeInfo(node)
5778 assert old_node is not None, "Can't retrieve locked node %s" % node
5779 for attr in self._NFLAGS:
5780 if getattr(self.op, attr) is None:
5781 setattr(self.op, attr, getattr(old_node, attr))
5783 for attr in self._NFLAGS:
5784 if getattr(self.op, attr) is None:
5785 setattr(self.op, attr, True)
5787 if self.op.readd and not self.op.vm_capable:
5788 pri, sec = cfg.GetNodeInstances(node)
5790 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5791 " flag set to false, but it already holds"
5792 " instances" % node,
5795 # check that the type of the node (single versus dual homed) is the
5796 # same as for the master
5797 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5798 master_singlehomed = myself.secondary_ip == myself.primary_ip
5799 newbie_singlehomed = secondary_ip == primary_ip
5800 if master_singlehomed != newbie_singlehomed:
5801 if master_singlehomed:
5802 raise errors.OpPrereqError("The master has no secondary ip but the"
5803 " new node has one",
5806 raise errors.OpPrereqError("The master has a secondary ip but the"
5807 " new node doesn't have one",
5810 # checks reachability
5811 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5812 raise errors.OpPrereqError("Node not reachable by ping",
5813 errors.ECODE_ENVIRON)
5815 if not newbie_singlehomed:
5816 # check reachability from my secondary ip to newbie's secondary ip
5817 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5818 source=myself.secondary_ip):
5819 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5820 " based ping to node daemon port",
5821 errors.ECODE_ENVIRON)
5828 if self.op.master_capable:
5829 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5831 self.master_candidate = False
5834 self.new_node = old_node
5836 node_group = cfg.LookupNodeGroup(self.op.group)
5837 self.new_node = objects.Node(name=node,
5838 primary_ip=primary_ip,
5839 secondary_ip=secondary_ip,
5840 master_candidate=self.master_candidate,
5841 offline=False, drained=False,
5844 if self.op.ndparams:
5845 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5847 if self.op.hv_state:
5848 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5850 if self.op.disk_state:
5851 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5853 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5854 # it a property on the base class.
5855 result = rpc.DnsOnlyRunner().call_version([node])[node]
5856 result.Raise("Can't get version information from node %s" % node)
5857 if constants.PROTOCOL_VERSION == result.payload:
5858 logging.info("Communication to node %s fine, sw version %s match",
5859 node, result.payload)
5861 raise errors.OpPrereqError("Version mismatch master version %s,"
5862 " node version %s" %
5863 (constants.PROTOCOL_VERSION, result.payload),
5864 errors.ECODE_ENVIRON)
5866 def Exec(self, feedback_fn):
5867 """Adds the new node to the cluster.
5870 new_node = self.new_node
5871 node = new_node.name
5873 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5876 # We adding a new node so we assume it's powered
5877 new_node.powered = True
5879 # for re-adds, reset the offline/drained/master-candidate flags;
5880 # we need to reset here, otherwise offline would prevent RPC calls
5881 # later in the procedure; this also means that if the re-add
5882 # fails, we are left with a non-offlined, broken node
5884 new_node.drained = new_node.offline = False # pylint: disable=W0201
5885 self.LogInfo("Readding a node, the offline/drained flags were reset")
5886 # if we demote the node, we do cleanup later in the procedure
5887 new_node.master_candidate = self.master_candidate
5888 if self.changed_primary_ip:
5889 new_node.primary_ip = self.op.primary_ip
5891 # copy the master/vm_capable flags
5892 for attr in self._NFLAGS:
5893 setattr(new_node, attr, getattr(self.op, attr))
5895 # notify the user about any possible mc promotion
5896 if new_node.master_candidate:
5897 self.LogInfo("Node will be a master candidate")
5899 if self.op.ndparams:
5900 new_node.ndparams = self.op.ndparams
5902 new_node.ndparams = {}
5904 if self.op.hv_state:
5905 new_node.hv_state_static = self.new_hv_state
5907 if self.op.disk_state:
5908 new_node.disk_state_static = self.new_disk_state
5910 # Add node to our /etc/hosts, and add key to known_hosts
5911 if self.cfg.GetClusterInfo().modify_etc_hosts:
5912 master_node = self.cfg.GetMasterNode()
5913 result = self.rpc.call_etc_hosts_modify(master_node,
5914 constants.ETC_HOSTS_ADD,
5917 result.Raise("Can't update hosts file with new host data")
5919 if new_node.secondary_ip != new_node.primary_ip:
5920 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5923 node_verify_list = [self.cfg.GetMasterNode()]
5924 node_verify_param = {
5925 constants.NV_NODELIST: ([node], {}),
5926 # TODO: do a node-net-test as well?
5929 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5930 self.cfg.GetClusterName())
5931 for verifier in node_verify_list:
5932 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5933 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5935 for failed in nl_payload:
5936 feedback_fn("ssh/hostname verification failed"
5937 " (checking from %s): %s" %
5938 (verifier, nl_payload[failed]))
5939 raise errors.OpExecError("ssh/hostname verification failed")
5942 _RedistributeAncillaryFiles(self)
5943 self.context.ReaddNode(new_node)
5944 # make sure we redistribute the config
5945 self.cfg.Update(new_node, feedback_fn)
5946 # and make sure the new node will not have old files around
5947 if not new_node.master_candidate:
5948 result = self.rpc.call_node_demote_from_mc(new_node.name)
5949 msg = result.fail_msg
5951 self.LogWarning("Node failed to demote itself from master"
5952 " candidate status: %s" % msg)
5954 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5955 additional_vm=self.op.vm_capable)
5956 self.context.AddNode(new_node, self.proc.GetECId())
5959 class LUNodeSetParams(LogicalUnit):
5960 """Modifies the parameters of a node.
5962 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5963 to the node role (as _ROLE_*)
5964 @cvar _R2F: a dictionary from node role to tuples of flags
5965 @cvar _FLAGS: a list of attribute names corresponding to the flags
5968 HPATH = "node-modify"
5969 HTYPE = constants.HTYPE_NODE
5971 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5973 (True, False, False): _ROLE_CANDIDATE,
5974 (False, True, False): _ROLE_DRAINED,
5975 (False, False, True): _ROLE_OFFLINE,
5976 (False, False, False): _ROLE_REGULAR,
5978 _R2F = dict((v, k) for k, v in _F2R.items())
5979 _FLAGS = ["master_candidate", "drained", "offline"]
5981 def CheckArguments(self):
5982 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5983 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5984 self.op.master_capable, self.op.vm_capable,
5985 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5987 if all_mods.count(None) == len(all_mods):
5988 raise errors.OpPrereqError("Please pass at least one modification",
5990 if all_mods.count(True) > 1:
5991 raise errors.OpPrereqError("Can't set the node into more than one"
5992 " state at the same time",
5995 # Boolean value that tells us whether we might be demoting from MC
5996 self.might_demote = (self.op.master_candidate is False or
5997 self.op.offline is True or
5998 self.op.drained is True or
5999 self.op.master_capable is False)
6001 if self.op.secondary_ip:
6002 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6003 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6004 " address" % self.op.secondary_ip,
6007 self.lock_all = self.op.auto_promote and self.might_demote
6008 self.lock_instances = self.op.secondary_ip is not None
6010 def _InstanceFilter(self, instance):
6011 """Filter for getting affected instances.
6014 return (instance.disk_template in constants.DTS_INT_MIRROR and
6015 self.op.node_name in instance.all_nodes)
6017 def ExpandNames(self):
6019 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6021 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6023 # Since modifying a node can have severe effects on currently running
6024 # operations the resource lock is at least acquired in shared mode
6025 self.needed_locks[locking.LEVEL_NODE_RES] = \
6026 self.needed_locks[locking.LEVEL_NODE]
6028 # Get node resource and instance locks in shared mode; they are not used
6029 # for anything but read-only access
6030 self.share_locks[locking.LEVEL_NODE_RES] = 1
6031 self.share_locks[locking.LEVEL_INSTANCE] = 1
6033 if self.lock_instances:
6034 self.needed_locks[locking.LEVEL_INSTANCE] = \
6035 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6037 def BuildHooksEnv(self):
6040 This runs on the master node.
6044 "OP_TARGET": self.op.node_name,
6045 "MASTER_CANDIDATE": str(self.op.master_candidate),
6046 "OFFLINE": str(self.op.offline),
6047 "DRAINED": str(self.op.drained),
6048 "MASTER_CAPABLE": str(self.op.master_capable),
6049 "VM_CAPABLE": str(self.op.vm_capable),
6052 def BuildHooksNodes(self):
6053 """Build hooks nodes.
6056 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6059 def CheckPrereq(self):
6060 """Check prerequisites.
6062 This only checks the instance list against the existing names.
6065 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6067 if self.lock_instances:
6068 affected_instances = \
6069 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6071 # Verify instance locks
6072 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6073 wanted_instances = frozenset(affected_instances.keys())
6074 if wanted_instances - owned_instances:
6075 raise errors.OpPrereqError("Instances affected by changing node %s's"
6076 " secondary IP address have changed since"
6077 " locks were acquired, wanted '%s', have"
6078 " '%s'; retry the operation" %
6080 utils.CommaJoin(wanted_instances),
6081 utils.CommaJoin(owned_instances)),
6084 affected_instances = None
6086 if (self.op.master_candidate is not None or
6087 self.op.drained is not None or
6088 self.op.offline is not None):
6089 # we can't change the master's node flags
6090 if self.op.node_name == self.cfg.GetMasterNode():
6091 raise errors.OpPrereqError("The master role can be changed"
6092 " only via master-failover",
6095 if self.op.master_candidate and not node.master_capable:
6096 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6097 " it a master candidate" % node.name,
6100 if self.op.vm_capable is False:
6101 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6103 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6104 " the vm_capable flag" % node.name,
6107 if node.master_candidate and self.might_demote and not self.lock_all:
6108 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6109 # check if after removing the current node, we're missing master
6111 (mc_remaining, mc_should, _) = \
6112 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6113 if mc_remaining < mc_should:
6114 raise errors.OpPrereqError("Not enough master candidates, please"
6115 " pass auto promote option to allow"
6116 " promotion (--auto-promote or RAPI"
6117 " auto_promote=True)", errors.ECODE_STATE)
6119 self.old_flags = old_flags = (node.master_candidate,
6120 node.drained, node.offline)
6121 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6122 self.old_role = old_role = self._F2R[old_flags]
6124 # Check for ineffective changes
6125 for attr in self._FLAGS:
6126 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6127 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6128 setattr(self.op, attr, None)
6130 # Past this point, any flag change to False means a transition
6131 # away from the respective state, as only real changes are kept
6133 # TODO: We might query the real power state if it supports OOB
6134 if _SupportsOob(self.cfg, node):
6135 if self.op.offline is False and not (node.powered or
6136 self.op.powered is True):
6137 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6138 " offline status can be reset") %
6139 self.op.node_name, errors.ECODE_STATE)
6140 elif self.op.powered is not None:
6141 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6142 " as it does not support out-of-band"
6143 " handling") % self.op.node_name,
6146 # If we're being deofflined/drained, we'll MC ourself if needed
6147 if (self.op.drained is False or self.op.offline is False or
6148 (self.op.master_capable and not node.master_capable)):
6149 if _DecideSelfPromotion(self):
6150 self.op.master_candidate = True
6151 self.LogInfo("Auto-promoting node to master candidate")
6153 # If we're no longer master capable, we'll demote ourselves from MC
6154 if self.op.master_capable is False and node.master_candidate:
6155 self.LogInfo("Demoting from master candidate")
6156 self.op.master_candidate = False
6159 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6160 if self.op.master_candidate:
6161 new_role = self._ROLE_CANDIDATE
6162 elif self.op.drained:
6163 new_role = self._ROLE_DRAINED
6164 elif self.op.offline:
6165 new_role = self._ROLE_OFFLINE
6166 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6167 # False is still in new flags, which means we're un-setting (the
6169 new_role = self._ROLE_REGULAR
6170 else: # no new flags, nothing, keep old role
6173 self.new_role = new_role
6175 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6176 # Trying to transition out of offline status
6177 result = self.rpc.call_version([node.name])[node.name]
6179 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6180 " to report its version: %s" %
6181 (node.name, result.fail_msg),
6184 self.LogWarning("Transitioning node from offline to online state"
6185 " without using re-add. Please make sure the node"
6188 # When changing the secondary ip, verify if this is a single-homed to
6189 # multi-homed transition or vice versa, and apply the relevant
6191 if self.op.secondary_ip:
6192 # Ok even without locking, because this can't be changed by any LU
6193 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6194 master_singlehomed = master.secondary_ip == master.primary_ip
6195 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6196 if self.op.force and node.name == master.name:
6197 self.LogWarning("Transitioning from single-homed to multi-homed"
6198 " cluster. All nodes will require a secondary ip.")
6200 raise errors.OpPrereqError("Changing the secondary ip on a"
6201 " single-homed cluster requires the"
6202 " --force option to be passed, and the"
6203 " target node to be the master",
6205 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6206 if self.op.force and node.name == master.name:
6207 self.LogWarning("Transitioning from multi-homed to single-homed"
6208 " cluster. Secondary IPs will have to be removed.")
6210 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6211 " same as the primary IP on a multi-homed"
6212 " cluster, unless the --force option is"
6213 " passed, and the target node is the"
6214 " master", errors.ECODE_INVAL)
6216 assert not (frozenset(affected_instances) -
6217 self.owned_locks(locking.LEVEL_INSTANCE))
6220 if affected_instances:
6221 msg = ("Cannot change secondary IP address: offline node has"
6222 " instances (%s) configured to use it" %
6223 utils.CommaJoin(affected_instances.keys()))
6224 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6226 # On online nodes, check that no instances are running, and that
6227 # the node has the new ip and we can reach it.
6228 for instance in affected_instances.values():
6229 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6230 msg="cannot change secondary ip")
6232 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6233 if master.name != node.name:
6234 # check reachability from master secondary ip to new secondary ip
6235 if not netutils.TcpPing(self.op.secondary_ip,
6236 constants.DEFAULT_NODED_PORT,
6237 source=master.secondary_ip):
6238 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6239 " based ping to node daemon port",
6240 errors.ECODE_ENVIRON)
6242 if self.op.ndparams:
6243 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6244 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6245 self.new_ndparams = new_ndparams
6247 if self.op.hv_state:
6248 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6249 self.node.hv_state_static)
6251 if self.op.disk_state:
6252 self.new_disk_state = \
6253 _MergeAndVerifyDiskState(self.op.disk_state,
6254 self.node.disk_state_static)
6256 def Exec(self, feedback_fn):
6261 old_role = self.old_role
6262 new_role = self.new_role
6266 if self.op.ndparams:
6267 node.ndparams = self.new_ndparams
6269 if self.op.powered is not None:
6270 node.powered = self.op.powered
6272 if self.op.hv_state:
6273 node.hv_state_static = self.new_hv_state
6275 if self.op.disk_state:
6276 node.disk_state_static = self.new_disk_state
6278 for attr in ["master_capable", "vm_capable"]:
6279 val = getattr(self.op, attr)
6281 setattr(node, attr, val)
6282 result.append((attr, str(val)))
6284 if new_role != old_role:
6285 # Tell the node to demote itself, if no longer MC and not offline
6286 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6287 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6289 self.LogWarning("Node failed to demote itself: %s", msg)
6291 new_flags = self._R2F[new_role]
6292 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6294 result.append((desc, str(nf)))
6295 (node.master_candidate, node.drained, node.offline) = new_flags
6297 # we locked all nodes, we adjust the CP before updating this node
6299 _AdjustCandidatePool(self, [node.name])
6301 if self.op.secondary_ip:
6302 node.secondary_ip = self.op.secondary_ip
6303 result.append(("secondary_ip", self.op.secondary_ip))
6305 # this will trigger configuration file update, if needed
6306 self.cfg.Update(node, feedback_fn)
6308 # this will trigger job queue propagation or cleanup if the mc
6310 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6311 self.context.ReaddNode(node)
6316 class LUNodePowercycle(NoHooksLU):
6317 """Powercycles a node.
6322 def CheckArguments(self):
6323 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6324 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6325 raise errors.OpPrereqError("The node is the master and the force"
6326 " parameter was not set",
6329 def ExpandNames(self):
6330 """Locking for PowercycleNode.
6332 This is a last-resort option and shouldn't block on other
6333 jobs. Therefore, we grab no locks.
6336 self.needed_locks = {}
6338 def Exec(self, feedback_fn):
6342 result = self.rpc.call_node_powercycle(self.op.node_name,
6343 self.cfg.GetHypervisorType())
6344 result.Raise("Failed to schedule the reboot")
6345 return result.payload
6348 class LUClusterQuery(NoHooksLU):
6349 """Query cluster configuration.
6354 def ExpandNames(self):
6355 self.needed_locks = {}
6357 def Exec(self, feedback_fn):
6358 """Return cluster config.
6361 cluster = self.cfg.GetClusterInfo()
6364 # Filter just for enabled hypervisors
6365 for os_name, hv_dict in cluster.os_hvp.items():
6366 os_hvp[os_name] = {}
6367 for hv_name, hv_params in hv_dict.items():
6368 if hv_name in cluster.enabled_hypervisors:
6369 os_hvp[os_name][hv_name] = hv_params
6371 # Convert ip_family to ip_version
6372 primary_ip_version = constants.IP4_VERSION
6373 if cluster.primary_ip_family == netutils.IP6Address.family:
6374 primary_ip_version = constants.IP6_VERSION
6377 "software_version": constants.RELEASE_VERSION,
6378 "protocol_version": constants.PROTOCOL_VERSION,
6379 "config_version": constants.CONFIG_VERSION,
6380 "os_api_version": max(constants.OS_API_VERSIONS),
6381 "export_version": constants.EXPORT_VERSION,
6382 "architecture": runtime.GetArchInfo(),
6383 "name": cluster.cluster_name,
6384 "master": cluster.master_node,
6385 "default_hypervisor": cluster.primary_hypervisor,
6386 "enabled_hypervisors": cluster.enabled_hypervisors,
6387 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6388 for hypervisor_name in cluster.enabled_hypervisors]),
6390 "beparams": cluster.beparams,
6391 "osparams": cluster.osparams,
6392 "ipolicy": cluster.ipolicy,
6393 "nicparams": cluster.nicparams,
6394 "ndparams": cluster.ndparams,
6395 "diskparams": cluster.diskparams,
6396 "candidate_pool_size": cluster.candidate_pool_size,
6397 "master_netdev": cluster.master_netdev,
6398 "master_netmask": cluster.master_netmask,
6399 "use_external_mip_script": cluster.use_external_mip_script,
6400 "volume_group_name": cluster.volume_group_name,
6401 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6402 "file_storage_dir": cluster.file_storage_dir,
6403 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6404 "maintain_node_health": cluster.maintain_node_health,
6405 "ctime": cluster.ctime,
6406 "mtime": cluster.mtime,
6407 "uuid": cluster.uuid,
6408 "tags": list(cluster.GetTags()),
6409 "uid_pool": cluster.uid_pool,
6410 "default_iallocator": cluster.default_iallocator,
6411 "reserved_lvs": cluster.reserved_lvs,
6412 "primary_ip_version": primary_ip_version,
6413 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6414 "hidden_os": cluster.hidden_os,
6415 "blacklisted_os": cluster.blacklisted_os,
6421 class LUClusterConfigQuery(NoHooksLU):
6422 """Return configuration values.
6427 def CheckArguments(self):
6428 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6430 def ExpandNames(self):
6431 self.cq.ExpandNames(self)
6433 def DeclareLocks(self, level):
6434 self.cq.DeclareLocks(self, level)
6436 def Exec(self, feedback_fn):
6437 result = self.cq.OldStyleQuery(self)
6439 assert len(result) == 1
6444 class _ClusterQuery(_QueryBase):
6445 FIELDS = query.CLUSTER_FIELDS
6447 #: Do not sort (there is only one item)
6450 def ExpandNames(self, lu):
6451 lu.needed_locks = {}
6453 # The following variables interact with _QueryBase._GetNames
6454 self.wanted = locking.ALL_SET
6455 self.do_locking = self.use_locking
6458 raise errors.OpPrereqError("Can not use locking for cluster queries",
6461 def DeclareLocks(self, lu, level):
6464 def _GetQueryData(self, lu):
6465 """Computes the list of nodes and their attributes.
6468 # Locking is not used
6469 assert not (compat.any(lu.glm.is_owned(level)
6470 for level in locking.LEVELS
6471 if level != locking.LEVEL_CLUSTER) or
6472 self.do_locking or self.use_locking)
6474 if query.CQ_CONFIG in self.requested_data:
6475 cluster = lu.cfg.GetClusterInfo()
6477 cluster = NotImplemented
6479 if query.CQ_QUEUE_DRAINED in self.requested_data:
6480 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6482 drain_flag = NotImplemented
6484 if query.CQ_WATCHER_PAUSE in self.requested_data:
6485 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6487 watcher_pause = NotImplemented
6489 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6492 class LUInstanceActivateDisks(NoHooksLU):
6493 """Bring up an instance's disks.
6498 def ExpandNames(self):
6499 self._ExpandAndLockInstance()
6500 self.needed_locks[locking.LEVEL_NODE] = []
6501 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6503 def DeclareLocks(self, level):
6504 if level == locking.LEVEL_NODE:
6505 self._LockInstancesNodes()
6507 def CheckPrereq(self):
6508 """Check prerequisites.
6510 This checks that the instance is in the cluster.
6513 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6514 assert self.instance is not None, \
6515 "Cannot retrieve locked instance %s" % self.op.instance_name
6516 _CheckNodeOnline(self, self.instance.primary_node)
6518 def Exec(self, feedback_fn):
6519 """Activate the disks.
6522 disks_ok, disks_info = \
6523 _AssembleInstanceDisks(self, self.instance,
6524 ignore_size=self.op.ignore_size)
6526 raise errors.OpExecError("Cannot activate block devices")
6528 if self.op.wait_for_sync:
6529 if not _WaitForSync(self, self.instance):
6530 raise errors.OpExecError("Some disks of the instance are degraded!")
6535 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6537 """Prepare the block devices for an instance.
6539 This sets up the block devices on all nodes.
6541 @type lu: L{LogicalUnit}
6542 @param lu: the logical unit on whose behalf we execute
6543 @type instance: L{objects.Instance}
6544 @param instance: the instance for whose disks we assemble
6545 @type disks: list of L{objects.Disk} or None
6546 @param disks: which disks to assemble (or all, if None)
6547 @type ignore_secondaries: boolean
6548 @param ignore_secondaries: if true, errors on secondary nodes
6549 won't result in an error return from the function
6550 @type ignore_size: boolean
6551 @param ignore_size: if true, the current known size of the disk
6552 will not be used during the disk activation, useful for cases
6553 when the size is wrong
6554 @return: False if the operation failed, otherwise a list of
6555 (host, instance_visible_name, node_visible_name)
6556 with the mapping from node devices to instance devices
6561 iname = instance.name
6562 disks = _ExpandCheckDisks(instance, disks)
6564 # With the two passes mechanism we try to reduce the window of
6565 # opportunity for the race condition of switching DRBD to primary
6566 # before handshaking occured, but we do not eliminate it
6568 # The proper fix would be to wait (with some limits) until the
6569 # connection has been made and drbd transitions from WFConnection
6570 # into any other network-connected state (Connected, SyncTarget,
6573 # 1st pass, assemble on all nodes in secondary mode
6574 for idx, inst_disk in enumerate(disks):
6575 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6577 node_disk = node_disk.Copy()
6578 node_disk.UnsetSize()
6579 lu.cfg.SetDiskID(node_disk, node)
6580 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6582 msg = result.fail_msg
6584 is_offline_secondary = (node in instance.secondary_nodes and
6586 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6587 " (is_primary=False, pass=1): %s",
6588 inst_disk.iv_name, node, msg)
6589 if not (ignore_secondaries or is_offline_secondary):
6592 # FIXME: race condition on drbd migration to primary
6594 # 2nd pass, do only the primary node
6595 for idx, inst_disk in enumerate(disks):
6598 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6599 if node != instance.primary_node:
6602 node_disk = node_disk.Copy()
6603 node_disk.UnsetSize()
6604 lu.cfg.SetDiskID(node_disk, node)
6605 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6607 msg = result.fail_msg
6609 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6610 " (is_primary=True, pass=2): %s",
6611 inst_disk.iv_name, node, msg)
6614 dev_path = result.payload
6616 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6618 # leave the disks configured for the primary node
6619 # this is a workaround that would be fixed better by
6620 # improving the logical/physical id handling
6622 lu.cfg.SetDiskID(disk, instance.primary_node)
6624 return disks_ok, device_info
6627 def _StartInstanceDisks(lu, instance, force):
6628 """Start the disks of an instance.
6631 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6632 ignore_secondaries=force)
6634 _ShutdownInstanceDisks(lu, instance)
6635 if force is not None and not force:
6636 lu.proc.LogWarning("", hint="If the message above refers to a"
6638 " you can retry the operation using '--force'.")
6639 raise errors.OpExecError("Disk consistency error")
6642 class LUInstanceDeactivateDisks(NoHooksLU):
6643 """Shutdown an instance's disks.
6648 def ExpandNames(self):
6649 self._ExpandAndLockInstance()
6650 self.needed_locks[locking.LEVEL_NODE] = []
6651 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6653 def DeclareLocks(self, level):
6654 if level == locking.LEVEL_NODE:
6655 self._LockInstancesNodes()
6657 def CheckPrereq(self):
6658 """Check prerequisites.
6660 This checks that the instance is in the cluster.
6663 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6664 assert self.instance is not None, \
6665 "Cannot retrieve locked instance %s" % self.op.instance_name
6667 def Exec(self, feedback_fn):
6668 """Deactivate the disks
6671 instance = self.instance
6673 _ShutdownInstanceDisks(self, instance)
6675 _SafeShutdownInstanceDisks(self, instance)
6678 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6679 """Shutdown block devices of an instance.
6681 This function checks if an instance is running, before calling
6682 _ShutdownInstanceDisks.
6685 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6686 _ShutdownInstanceDisks(lu, instance, disks=disks)
6689 def _ExpandCheckDisks(instance, disks):
6690 """Return the instance disks selected by the disks list
6692 @type disks: list of L{objects.Disk} or None
6693 @param disks: selected disks
6694 @rtype: list of L{objects.Disk}
6695 @return: selected instance disks to act on
6699 return instance.disks
6701 if not set(disks).issubset(instance.disks):
6702 raise errors.ProgrammerError("Can only act on disks belonging to the"
6707 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6708 """Shutdown block devices of an instance.
6710 This does the shutdown on all nodes of the instance.
6712 If the ignore_primary is false, errors on the primary node are
6717 disks = _ExpandCheckDisks(instance, disks)
6720 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6721 lu.cfg.SetDiskID(top_disk, node)
6722 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6723 msg = result.fail_msg
6725 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6726 disk.iv_name, node, msg)
6727 if ((node == instance.primary_node and not ignore_primary) or
6728 (node != instance.primary_node and not result.offline)):
6733 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6734 """Checks if a node has enough free memory.
6736 This function check if a given node has the needed amount of free
6737 memory. In case the node has less memory or we cannot get the
6738 information from the node, this function raise an OpPrereqError
6741 @type lu: C{LogicalUnit}
6742 @param lu: a logical unit from which we get configuration data
6744 @param node: the node to check
6745 @type reason: C{str}
6746 @param reason: string to use in the error message
6747 @type requested: C{int}
6748 @param requested: the amount of memory in MiB to check for
6749 @type hypervisor_name: C{str}
6750 @param hypervisor_name: the hypervisor to ask for memory stats
6752 @return: node current free memory
6753 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6754 we cannot check the node
6757 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6758 nodeinfo[node].Raise("Can't get data from node %s" % node,
6759 prereq=True, ecode=errors.ECODE_ENVIRON)
6760 (_, _, (hv_info, )) = nodeinfo[node].payload
6762 free_mem = hv_info.get("memory_free", None)
6763 if not isinstance(free_mem, int):
6764 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6765 " was '%s'" % (node, free_mem),
6766 errors.ECODE_ENVIRON)
6767 if requested > free_mem:
6768 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6769 " needed %s MiB, available %s MiB" %
6770 (node, reason, requested, free_mem),
6775 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6776 """Checks if nodes have enough free disk space in the all VGs.
6778 This function check if all given nodes have the needed amount of
6779 free disk. In case any node has less disk or we cannot get the
6780 information from the node, this function raise an OpPrereqError
6783 @type lu: C{LogicalUnit}
6784 @param lu: a logical unit from which we get configuration data
6785 @type nodenames: C{list}
6786 @param nodenames: the list of node names to check
6787 @type req_sizes: C{dict}
6788 @param req_sizes: the hash of vg and corresponding amount of disk in
6790 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6791 or we cannot check the node
6794 for vg, req_size in req_sizes.items():
6795 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6798 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6799 """Checks if nodes have enough free disk space in the specified VG.
6801 This function check if all given nodes have the needed amount of
6802 free disk. In case any node has less disk or we cannot get the
6803 information from the node, this function raise an OpPrereqError
6806 @type lu: C{LogicalUnit}
6807 @param lu: a logical unit from which we get configuration data
6808 @type nodenames: C{list}
6809 @param nodenames: the list of node names to check
6811 @param vg: the volume group to check
6812 @type requested: C{int}
6813 @param requested: the amount of disk in MiB to check for
6814 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6815 or we cannot check the node
6818 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6819 for node in nodenames:
6820 info = nodeinfo[node]
6821 info.Raise("Cannot get current information from node %s" % node,
6822 prereq=True, ecode=errors.ECODE_ENVIRON)
6823 (_, (vg_info, ), _) = info.payload
6824 vg_free = vg_info.get("vg_free", None)
6825 if not isinstance(vg_free, int):
6826 raise errors.OpPrereqError("Can't compute free disk space on node"
6827 " %s for vg %s, result was '%s'" %
6828 (node, vg, vg_free), errors.ECODE_ENVIRON)
6829 if requested > vg_free:
6830 raise errors.OpPrereqError("Not enough disk space on target node %s"
6831 " vg %s: required %d MiB, available %d MiB" %
6832 (node, vg, requested, vg_free),
6836 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6837 """Checks if nodes have enough physical CPUs
6839 This function checks if all given nodes have the needed number of
6840 physical CPUs. In case any node has less CPUs or we cannot get the
6841 information from the node, this function raises an OpPrereqError
6844 @type lu: C{LogicalUnit}
6845 @param lu: a logical unit from which we get configuration data
6846 @type nodenames: C{list}
6847 @param nodenames: the list of node names to check
6848 @type requested: C{int}
6849 @param requested: the minimum acceptable number of physical CPUs
6850 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6851 or we cannot check the node
6854 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6855 for node in nodenames:
6856 info = nodeinfo[node]
6857 info.Raise("Cannot get current information from node %s" % node,
6858 prereq=True, ecode=errors.ECODE_ENVIRON)
6859 (_, _, (hv_info, )) = info.payload
6860 num_cpus = hv_info.get("cpu_total", None)
6861 if not isinstance(num_cpus, int):
6862 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6863 " on node %s, result was '%s'" %
6864 (node, num_cpus), errors.ECODE_ENVIRON)
6865 if requested > num_cpus:
6866 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6867 "required" % (node, num_cpus, requested),
6871 class LUInstanceStartup(LogicalUnit):
6872 """Starts an instance.
6875 HPATH = "instance-start"
6876 HTYPE = constants.HTYPE_INSTANCE
6879 def CheckArguments(self):
6881 if self.op.beparams:
6882 # fill the beparams dict
6883 objects.UpgradeBeParams(self.op.beparams)
6884 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6886 def ExpandNames(self):
6887 self._ExpandAndLockInstance()
6888 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6890 def DeclareLocks(self, level):
6891 if level == locking.LEVEL_NODE_RES:
6892 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6894 def BuildHooksEnv(self):
6897 This runs on master, primary and secondary nodes of the instance.
6901 "FORCE": self.op.force,
6904 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6908 def BuildHooksNodes(self):
6909 """Build hooks nodes.
6912 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6915 def CheckPrereq(self):
6916 """Check prerequisites.
6918 This checks that the instance is in the cluster.
6921 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6922 assert self.instance is not None, \
6923 "Cannot retrieve locked instance %s" % self.op.instance_name
6926 if self.op.hvparams:
6927 # check hypervisor parameter syntax (locally)
6928 cluster = self.cfg.GetClusterInfo()
6929 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6930 filled_hvp = cluster.FillHV(instance)
6931 filled_hvp.update(self.op.hvparams)
6932 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6933 hv_type.CheckParameterSyntax(filled_hvp)
6934 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6936 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6938 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6940 if self.primary_offline and self.op.ignore_offline_nodes:
6941 self.proc.LogWarning("Ignoring offline primary node")
6943 if self.op.hvparams or self.op.beparams:
6944 self.proc.LogWarning("Overridden parameters are ignored")
6946 _CheckNodeOnline(self, instance.primary_node)
6948 bep = self.cfg.GetClusterInfo().FillBE(instance)
6949 bep.update(self.op.beparams)
6951 # check bridges existence
6952 _CheckInstanceBridgesExist(self, instance)
6954 remote_info = self.rpc.call_instance_info(instance.primary_node,
6956 instance.hypervisor)
6957 remote_info.Raise("Error checking node %s" % instance.primary_node,
6958 prereq=True, ecode=errors.ECODE_ENVIRON)
6959 if not remote_info.payload: # not running already
6960 _CheckNodeFreeMemory(self, instance.primary_node,
6961 "starting instance %s" % instance.name,
6962 bep[constants.BE_MINMEM], instance.hypervisor)
6964 def Exec(self, feedback_fn):
6965 """Start the instance.
6968 instance = self.instance
6969 force = self.op.force
6971 if not self.op.no_remember:
6972 self.cfg.MarkInstanceUp(instance.name)
6974 if self.primary_offline:
6975 assert self.op.ignore_offline_nodes
6976 self.proc.LogInfo("Primary node offline, marked instance as started")
6978 node_current = instance.primary_node
6980 _StartInstanceDisks(self, instance, force)
6983 self.rpc.call_instance_start(node_current,
6984 (instance, self.op.hvparams,
6986 self.op.startup_paused)
6987 msg = result.fail_msg
6989 _ShutdownInstanceDisks(self, instance)
6990 raise errors.OpExecError("Could not start instance: %s" % msg)
6993 class LUInstanceReboot(LogicalUnit):
6994 """Reboot an instance.
6997 HPATH = "instance-reboot"
6998 HTYPE = constants.HTYPE_INSTANCE
7001 def ExpandNames(self):
7002 self._ExpandAndLockInstance()
7004 def BuildHooksEnv(self):
7007 This runs on master, primary and secondary nodes of the instance.
7011 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7012 "REBOOT_TYPE": self.op.reboot_type,
7013 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7016 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7020 def BuildHooksNodes(self):
7021 """Build hooks nodes.
7024 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7027 def CheckPrereq(self):
7028 """Check prerequisites.
7030 This checks that the instance is in the cluster.
7033 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7034 assert self.instance is not None, \
7035 "Cannot retrieve locked instance %s" % self.op.instance_name
7036 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7037 _CheckNodeOnline(self, instance.primary_node)
7039 # check bridges existence
7040 _CheckInstanceBridgesExist(self, instance)
7042 def Exec(self, feedback_fn):
7043 """Reboot the instance.
7046 instance = self.instance
7047 ignore_secondaries = self.op.ignore_secondaries
7048 reboot_type = self.op.reboot_type
7050 remote_info = self.rpc.call_instance_info(instance.primary_node,
7052 instance.hypervisor)
7053 remote_info.Raise("Error checking node %s" % instance.primary_node)
7054 instance_running = bool(remote_info.payload)
7056 node_current = instance.primary_node
7058 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7059 constants.INSTANCE_REBOOT_HARD]:
7060 for disk in instance.disks:
7061 self.cfg.SetDiskID(disk, node_current)
7062 result = self.rpc.call_instance_reboot(node_current, instance,
7064 self.op.shutdown_timeout)
7065 result.Raise("Could not reboot instance")
7067 if instance_running:
7068 result = self.rpc.call_instance_shutdown(node_current, instance,
7069 self.op.shutdown_timeout)
7070 result.Raise("Could not shutdown instance for full reboot")
7071 _ShutdownInstanceDisks(self, instance)
7073 self.LogInfo("Instance %s was already stopped, starting now",
7075 _StartInstanceDisks(self, instance, ignore_secondaries)
7076 result = self.rpc.call_instance_start(node_current,
7077 (instance, None, None), False)
7078 msg = result.fail_msg
7080 _ShutdownInstanceDisks(self, instance)
7081 raise errors.OpExecError("Could not start instance for"
7082 " full reboot: %s" % msg)
7084 self.cfg.MarkInstanceUp(instance.name)
7087 class LUInstanceShutdown(LogicalUnit):
7088 """Shutdown an instance.
7091 HPATH = "instance-stop"
7092 HTYPE = constants.HTYPE_INSTANCE
7095 def ExpandNames(self):
7096 self._ExpandAndLockInstance()
7098 def BuildHooksEnv(self):
7101 This runs on master, primary and secondary nodes of the instance.
7104 env = _BuildInstanceHookEnvByObject(self, self.instance)
7105 env["TIMEOUT"] = self.op.timeout
7108 def BuildHooksNodes(self):
7109 """Build hooks nodes.
7112 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7115 def CheckPrereq(self):
7116 """Check prerequisites.
7118 This checks that the instance is in the cluster.
7121 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7122 assert self.instance is not None, \
7123 "Cannot retrieve locked instance %s" % self.op.instance_name
7125 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7127 self.primary_offline = \
7128 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7130 if self.primary_offline and self.op.ignore_offline_nodes:
7131 self.proc.LogWarning("Ignoring offline primary node")
7133 _CheckNodeOnline(self, self.instance.primary_node)
7135 def Exec(self, feedback_fn):
7136 """Shutdown the instance.
7139 instance = self.instance
7140 node_current = instance.primary_node
7141 timeout = self.op.timeout
7143 if not self.op.no_remember:
7144 self.cfg.MarkInstanceDown(instance.name)
7146 if self.primary_offline:
7147 assert self.op.ignore_offline_nodes
7148 self.proc.LogInfo("Primary node offline, marked instance as stopped")
7150 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7151 msg = result.fail_msg
7153 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7155 _ShutdownInstanceDisks(self, instance)
7158 class LUInstanceReinstall(LogicalUnit):
7159 """Reinstall an instance.
7162 HPATH = "instance-reinstall"
7163 HTYPE = constants.HTYPE_INSTANCE
7166 def ExpandNames(self):
7167 self._ExpandAndLockInstance()
7169 def BuildHooksEnv(self):
7172 This runs on master, primary and secondary nodes of the instance.
7175 return _BuildInstanceHookEnvByObject(self, self.instance)
7177 def BuildHooksNodes(self):
7178 """Build hooks nodes.
7181 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7184 def CheckPrereq(self):
7185 """Check prerequisites.
7187 This checks that the instance is in the cluster and is not running.
7190 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7191 assert instance is not None, \
7192 "Cannot retrieve locked instance %s" % self.op.instance_name
7193 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7194 " offline, cannot reinstall")
7196 if instance.disk_template == constants.DT_DISKLESS:
7197 raise errors.OpPrereqError("Instance '%s' has no disks" %
7198 self.op.instance_name,
7200 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7202 if self.op.os_type is not None:
7204 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7205 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7206 instance_os = self.op.os_type
7208 instance_os = instance.os
7210 nodelist = list(instance.all_nodes)
7212 if self.op.osparams:
7213 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7214 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7215 self.os_inst = i_osdict # the new dict (without defaults)
7219 self.instance = instance
7221 def Exec(self, feedback_fn):
7222 """Reinstall the instance.
7225 inst = self.instance
7227 if self.op.os_type is not None:
7228 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7229 inst.os = self.op.os_type
7230 # Write to configuration
7231 self.cfg.Update(inst, feedback_fn)
7233 _StartInstanceDisks(self, inst, None)
7235 feedback_fn("Running the instance OS create scripts...")
7236 # FIXME: pass debug option from opcode to backend
7237 result = self.rpc.call_instance_os_add(inst.primary_node,
7238 (inst, self.os_inst), True,
7239 self.op.debug_level)
7240 result.Raise("Could not install OS for instance %s on node %s" %
7241 (inst.name, inst.primary_node))
7243 _ShutdownInstanceDisks(self, inst)
7246 class LUInstanceRecreateDisks(LogicalUnit):
7247 """Recreate an instance's missing disks.
7250 HPATH = "instance-recreate-disks"
7251 HTYPE = constants.HTYPE_INSTANCE
7254 _MODIFYABLE = frozenset([
7255 constants.IDISK_SIZE,
7256 constants.IDISK_MODE,
7259 # New or changed disk parameters may have different semantics
7260 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7261 constants.IDISK_ADOPT,
7263 # TODO: Implement support changing VG while recreating
7265 constants.IDISK_METAVG,
7268 def _RunAllocator(self):
7269 """Run the allocator based on input opcode.
7272 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7275 # The allocator should actually run in "relocate" mode, but current
7276 # allocators don't support relocating all the nodes of an instance at
7277 # the same time. As a workaround we use "allocate" mode, but this is
7278 # suboptimal for two reasons:
7279 # - The instance name passed to the allocator is present in the list of
7280 # existing instances, so there could be a conflict within the
7281 # internal structures of the allocator. This doesn't happen with the
7282 # current allocators, but it's a liability.
7283 # - The allocator counts the resources used by the instance twice: once
7284 # because the instance exists already, and once because it tries to
7285 # allocate a new instance.
7286 # The allocator could choose some of the nodes on which the instance is
7287 # running, but that's not a problem. If the instance nodes are broken,
7288 # they should be already be marked as drained or offline, and hence
7289 # skipped by the allocator. If instance disks have been lost for other
7290 # reasons, then recreating the disks on the same nodes should be fine.
7291 disk_template = self.instance.disk_template
7292 spindle_use = be_full[constants.BE_SPINDLE_USE]
7293 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7294 disk_template=disk_template,
7295 tags=list(self.instance.GetTags()),
7296 os=self.instance.os,
7298 vcpus=be_full[constants.BE_VCPUS],
7299 memory=be_full[constants.BE_MAXMEM],
7300 spindle_use=spindle_use,
7301 disks=[{constants.IDISK_SIZE: d.size,
7302 constants.IDISK_MODE: d.mode}
7303 for d in self.instance.disks],
7304 hypervisor=self.instance.hypervisor)
7305 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7307 ial.Run(self.op.iallocator)
7309 assert req.RequiredNodes() == len(self.instance.all_nodes)
7312 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7313 " %s" % (self.op.iallocator, ial.info),
7316 self.op.nodes = ial.result
7317 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7318 self.op.instance_name, self.op.iallocator,
7319 utils.CommaJoin(ial.result))
7321 def CheckArguments(self):
7322 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7323 # Normalize and convert deprecated list of disk indices
7324 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7326 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7328 raise errors.OpPrereqError("Some disks have been specified more than"
7329 " once: %s" % utils.CommaJoin(duplicates),
7332 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7333 # when neither iallocator nor nodes are specified
7334 if self.op.iallocator or self.op.nodes:
7335 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7337 for (idx, params) in self.op.disks:
7338 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7339 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7341 raise errors.OpPrereqError("Parameters for disk %s try to change"
7342 " unmodifyable parameter(s): %s" %
7343 (idx, utils.CommaJoin(unsupported)),
7346 def ExpandNames(self):
7347 self._ExpandAndLockInstance()
7348 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7350 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7351 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7353 self.needed_locks[locking.LEVEL_NODE] = []
7354 if self.op.iallocator:
7355 # iallocator will select a new node in the same group
7356 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7357 self.needed_locks[locking.LEVEL_NODE_RES] = []
7359 def DeclareLocks(self, level):
7360 if level == locking.LEVEL_NODEGROUP:
7361 assert self.op.iallocator is not None
7362 assert not self.op.nodes
7363 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7364 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7365 # Lock the primary group used by the instance optimistically; this
7366 # requires going via the node before it's locked, requiring
7367 # verification later on
7368 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7369 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7371 elif level == locking.LEVEL_NODE:
7372 # If an allocator is used, then we lock all the nodes in the current
7373 # instance group, as we don't know yet which ones will be selected;
7374 # if we replace the nodes without using an allocator, locks are
7375 # already declared in ExpandNames; otherwise, we need to lock all the
7376 # instance nodes for disk re-creation
7377 if self.op.iallocator:
7378 assert not self.op.nodes
7379 assert not self.needed_locks[locking.LEVEL_NODE]
7380 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7382 # Lock member nodes of the group of the primary node
7383 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7384 self.needed_locks[locking.LEVEL_NODE].extend(
7385 self.cfg.GetNodeGroup(group_uuid).members)
7386 elif not self.op.nodes:
7387 self._LockInstancesNodes(primary_only=False)
7388 elif level == locking.LEVEL_NODE_RES:
7390 self.needed_locks[locking.LEVEL_NODE_RES] = \
7391 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7393 def BuildHooksEnv(self):
7396 This runs on master, primary and secondary nodes of the instance.
7399 return _BuildInstanceHookEnvByObject(self, self.instance)
7401 def BuildHooksNodes(self):
7402 """Build hooks nodes.
7405 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7408 def CheckPrereq(self):
7409 """Check prerequisites.
7411 This checks that the instance is in the cluster and is not running.
7414 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7415 assert instance is not None, \
7416 "Cannot retrieve locked instance %s" % self.op.instance_name
7418 if len(self.op.nodes) != len(instance.all_nodes):
7419 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7420 " %d replacement nodes were specified" %
7421 (instance.name, len(instance.all_nodes),
7422 len(self.op.nodes)),
7424 assert instance.disk_template != constants.DT_DRBD8 or \
7425 len(self.op.nodes) == 2
7426 assert instance.disk_template != constants.DT_PLAIN or \
7427 len(self.op.nodes) == 1
7428 primary_node = self.op.nodes[0]
7430 primary_node = instance.primary_node
7431 if not self.op.iallocator:
7432 _CheckNodeOnline(self, primary_node)
7434 if instance.disk_template == constants.DT_DISKLESS:
7435 raise errors.OpPrereqError("Instance '%s' has no disks" %
7436 self.op.instance_name, errors.ECODE_INVAL)
7438 # Verify if node group locks are still correct
7439 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7441 # Node group locks are acquired only for the primary node (and only
7442 # when the allocator is used)
7443 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7446 # if we replace nodes *and* the old primary is offline, we don't
7447 # check the instance state
7448 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7449 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7450 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7451 msg="cannot recreate disks")
7454 self.disks = dict(self.op.disks)
7456 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7458 maxidx = max(self.disks.keys())
7459 if maxidx >= len(instance.disks):
7460 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7463 if ((self.op.nodes or self.op.iallocator) and
7464 sorted(self.disks.keys()) != range(len(instance.disks))):
7465 raise errors.OpPrereqError("Can't recreate disks partially and"
7466 " change the nodes at the same time",
7469 self.instance = instance
7471 if self.op.iallocator:
7472 self._RunAllocator()
7473 # Release unneeded node and node resource locks
7474 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7475 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7477 def Exec(self, feedback_fn):
7478 """Recreate the disks.
7481 instance = self.instance
7483 assert (self.owned_locks(locking.LEVEL_NODE) ==
7484 self.owned_locks(locking.LEVEL_NODE_RES))
7487 mods = [] # keeps track of needed changes
7489 for idx, disk in enumerate(instance.disks):
7491 changes = self.disks[idx]
7493 # Disk should not be recreated
7497 # update secondaries for disks, if needed
7498 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7499 # need to update the nodes and minors
7500 assert len(self.op.nodes) == 2
7501 assert len(disk.logical_id) == 6 # otherwise disk internals
7503 (_, _, old_port, _, _, old_secret) = disk.logical_id
7504 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7505 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7506 new_minors[0], new_minors[1], old_secret)
7507 assert len(disk.logical_id) == len(new_id)
7511 mods.append((idx, new_id, changes))
7513 # now that we have passed all asserts above, we can apply the mods
7514 # in a single run (to avoid partial changes)
7515 for idx, new_id, changes in mods:
7516 disk = instance.disks[idx]
7517 if new_id is not None:
7518 assert disk.dev_type == constants.LD_DRBD8
7519 disk.logical_id = new_id
7521 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7522 mode=changes.get(constants.IDISK_MODE, None))
7524 # change primary node, if needed
7526 instance.primary_node = self.op.nodes[0]
7527 self.LogWarning("Changing the instance's nodes, you will have to"
7528 " remove any disks left on the older nodes manually")
7531 self.cfg.Update(instance, feedback_fn)
7533 # All touched nodes must be locked
7534 mylocks = self.owned_locks(locking.LEVEL_NODE)
7535 assert mylocks.issuperset(frozenset(instance.all_nodes))
7536 _CreateDisks(self, instance, to_skip=to_skip)
7539 class LUInstanceRename(LogicalUnit):
7540 """Rename an instance.
7543 HPATH = "instance-rename"
7544 HTYPE = constants.HTYPE_INSTANCE
7546 def CheckArguments(self):
7550 if self.op.ip_check and not self.op.name_check:
7551 # TODO: make the ip check more flexible and not depend on the name check
7552 raise errors.OpPrereqError("IP address check requires a name check",
7555 def BuildHooksEnv(self):
7558 This runs on master, primary and secondary nodes of the instance.
7561 env = _BuildInstanceHookEnvByObject(self, self.instance)
7562 env["INSTANCE_NEW_NAME"] = self.op.new_name
7565 def BuildHooksNodes(self):
7566 """Build hooks nodes.
7569 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7572 def CheckPrereq(self):
7573 """Check prerequisites.
7575 This checks that the instance is in the cluster and is not running.
7578 self.op.instance_name = _ExpandInstanceName(self.cfg,
7579 self.op.instance_name)
7580 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7581 assert instance is not None
7582 _CheckNodeOnline(self, instance.primary_node)
7583 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7584 msg="cannot rename")
7585 self.instance = instance
7587 new_name = self.op.new_name
7588 if self.op.name_check:
7589 hostname = _CheckHostnameSane(self, new_name)
7590 new_name = self.op.new_name = hostname.name
7591 if (self.op.ip_check and
7592 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7593 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7594 (hostname.ip, new_name),
7595 errors.ECODE_NOTUNIQUE)
7597 instance_list = self.cfg.GetInstanceList()
7598 if new_name in instance_list and new_name != instance.name:
7599 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7600 new_name, errors.ECODE_EXISTS)
7602 def Exec(self, feedback_fn):
7603 """Rename the instance.
7606 inst = self.instance
7607 old_name = inst.name
7609 rename_file_storage = False
7610 if (inst.disk_template in constants.DTS_FILEBASED and
7611 self.op.new_name != inst.name):
7612 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7613 rename_file_storage = True
7615 self.cfg.RenameInstance(inst.name, self.op.new_name)
7616 # Change the instance lock. This is definitely safe while we hold the BGL.
7617 # Otherwise the new lock would have to be added in acquired mode.
7619 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7620 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7622 # re-read the instance from the configuration after rename
7623 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7625 if rename_file_storage:
7626 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7627 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7628 old_file_storage_dir,
7629 new_file_storage_dir)
7630 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7631 " (but the instance has been renamed in Ganeti)" %
7632 (inst.primary_node, old_file_storage_dir,
7633 new_file_storage_dir))
7635 _StartInstanceDisks(self, inst, None)
7636 # update info on disks
7637 info = _GetInstanceInfoText(inst)
7638 for (idx, disk) in enumerate(inst.disks):
7639 for node in inst.all_nodes:
7640 self.cfg.SetDiskID(disk, node)
7641 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7643 self.LogWarning("Error setting info on node %s for disk %s: %s",
7644 node, idx, result.fail_msg)
7646 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7647 old_name, self.op.debug_level)
7648 msg = result.fail_msg
7650 msg = ("Could not run OS rename script for instance %s on node %s"
7651 " (but the instance has been renamed in Ganeti): %s" %
7652 (inst.name, inst.primary_node, msg))
7653 self.proc.LogWarning(msg)
7655 _ShutdownInstanceDisks(self, inst)
7660 class LUInstanceRemove(LogicalUnit):
7661 """Remove an instance.
7664 HPATH = "instance-remove"
7665 HTYPE = constants.HTYPE_INSTANCE
7668 def ExpandNames(self):
7669 self._ExpandAndLockInstance()
7670 self.needed_locks[locking.LEVEL_NODE] = []
7671 self.needed_locks[locking.LEVEL_NODE_RES] = []
7672 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7674 def DeclareLocks(self, level):
7675 if level == locking.LEVEL_NODE:
7676 self._LockInstancesNodes()
7677 elif level == locking.LEVEL_NODE_RES:
7679 self.needed_locks[locking.LEVEL_NODE_RES] = \
7680 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7682 def BuildHooksEnv(self):
7685 This runs on master, primary and secondary nodes of the instance.
7688 env = _BuildInstanceHookEnvByObject(self, self.instance)
7689 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7692 def BuildHooksNodes(self):
7693 """Build hooks nodes.
7696 nl = [self.cfg.GetMasterNode()]
7697 nl_post = list(self.instance.all_nodes) + nl
7698 return (nl, nl_post)
7700 def CheckPrereq(self):
7701 """Check prerequisites.
7703 This checks that the instance is in the cluster.
7706 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7707 assert self.instance is not None, \
7708 "Cannot retrieve locked instance %s" % self.op.instance_name
7710 def Exec(self, feedback_fn):
7711 """Remove the instance.
7714 instance = self.instance
7715 logging.info("Shutting down instance %s on node %s",
7716 instance.name, instance.primary_node)
7718 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7719 self.op.shutdown_timeout)
7720 msg = result.fail_msg
7722 if self.op.ignore_failures:
7723 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7725 raise errors.OpExecError("Could not shutdown instance %s on"
7727 (instance.name, instance.primary_node, msg))
7729 assert (self.owned_locks(locking.LEVEL_NODE) ==
7730 self.owned_locks(locking.LEVEL_NODE_RES))
7731 assert not (set(instance.all_nodes) -
7732 self.owned_locks(locking.LEVEL_NODE)), \
7733 "Not owning correct locks"
7735 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7738 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7739 """Utility function to remove an instance.
7742 logging.info("Removing block devices for instance %s", instance.name)
7744 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7745 if not ignore_failures:
7746 raise errors.OpExecError("Can't remove instance's disks")
7747 feedback_fn("Warning: can't remove instance's disks")
7749 logging.info("Removing instance %s out of cluster config", instance.name)
7751 lu.cfg.RemoveInstance(instance.name)
7753 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7754 "Instance lock removal conflict"
7756 # Remove lock for the instance
7757 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7760 class LUInstanceQuery(NoHooksLU):
7761 """Logical unit for querying instances.
7764 # pylint: disable=W0142
7767 def CheckArguments(self):
7768 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7769 self.op.output_fields, self.op.use_locking)
7771 def ExpandNames(self):
7772 self.iq.ExpandNames(self)
7774 def DeclareLocks(self, level):
7775 self.iq.DeclareLocks(self, level)
7777 def Exec(self, feedback_fn):
7778 return self.iq.OldStyleQuery(self)
7781 def _ExpandNamesForMigration(lu):
7782 """Expands names for use with L{TLMigrateInstance}.
7784 @type lu: L{LogicalUnit}
7787 if lu.op.target_node is not None:
7788 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7790 lu.needed_locks[locking.LEVEL_NODE] = []
7791 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7793 lu.needed_locks[locking.LEVEL_NODE_RES] = []
7794 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7797 def _DeclareLocksForMigration(lu, level):
7798 """Declares locks for L{TLMigrateInstance}.
7800 @type lu: L{LogicalUnit}
7801 @param level: Lock level
7804 if level == locking.LEVEL_NODE:
7805 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7806 if instance.disk_template in constants.DTS_EXT_MIRROR:
7807 if lu.op.target_node is None:
7808 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7810 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7812 del lu.recalculate_locks[locking.LEVEL_NODE]
7814 lu._LockInstancesNodes() # pylint: disable=W0212
7815 elif level == locking.LEVEL_NODE_RES:
7817 lu.needed_locks[locking.LEVEL_NODE_RES] = \
7818 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7821 class LUInstanceFailover(LogicalUnit):
7822 """Failover an instance.
7825 HPATH = "instance-failover"
7826 HTYPE = constants.HTYPE_INSTANCE
7829 def CheckArguments(self):
7830 """Check the arguments.
7833 self.iallocator = getattr(self.op, "iallocator", None)
7834 self.target_node = getattr(self.op, "target_node", None)
7836 def ExpandNames(self):
7837 self._ExpandAndLockInstance()
7838 _ExpandNamesForMigration(self)
7841 TLMigrateInstance(self, self.op.instance_name, False, True, False,
7842 self.op.ignore_consistency, True,
7843 self.op.shutdown_timeout, self.op.ignore_ipolicy)
7845 self.tasklets = [self._migrater]
7847 def DeclareLocks(self, level):
7848 _DeclareLocksForMigration(self, level)
7850 def BuildHooksEnv(self):
7853 This runs on master, primary and secondary nodes of the instance.
7856 instance = self._migrater.instance
7857 source_node = instance.primary_node
7858 target_node = self.op.target_node
7860 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7861 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7862 "OLD_PRIMARY": source_node,
7863 "NEW_PRIMARY": target_node,
7866 if instance.disk_template in constants.DTS_INT_MIRROR:
7867 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7868 env["NEW_SECONDARY"] = source_node
7870 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7872 env.update(_BuildInstanceHookEnvByObject(self, instance))
7876 def BuildHooksNodes(self):
7877 """Build hooks nodes.
7880 instance = self._migrater.instance
7881 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7882 return (nl, nl + [instance.primary_node])
7885 class LUInstanceMigrate(LogicalUnit):
7886 """Migrate an instance.
7888 This is migration without shutting down, compared to the failover,
7889 which is done with shutdown.
7892 HPATH = "instance-migrate"
7893 HTYPE = constants.HTYPE_INSTANCE
7896 def ExpandNames(self):
7897 self._ExpandAndLockInstance()
7898 _ExpandNamesForMigration(self)
7901 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7902 False, self.op.allow_failover, False,
7903 self.op.allow_runtime_changes,
7904 constants.DEFAULT_SHUTDOWN_TIMEOUT,
7905 self.op.ignore_ipolicy)
7907 self.tasklets = [self._migrater]
7909 def DeclareLocks(self, level):
7910 _DeclareLocksForMigration(self, level)
7912 def BuildHooksEnv(self):
7915 This runs on master, primary and secondary nodes of the instance.
7918 instance = self._migrater.instance
7919 source_node = instance.primary_node
7920 target_node = self.op.target_node
7921 env = _BuildInstanceHookEnvByObject(self, instance)
7923 "MIGRATE_LIVE": self._migrater.live,
7924 "MIGRATE_CLEANUP": self.op.cleanup,
7925 "OLD_PRIMARY": source_node,
7926 "NEW_PRIMARY": target_node,
7927 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7930 if instance.disk_template in constants.DTS_INT_MIRROR:
7931 env["OLD_SECONDARY"] = target_node
7932 env["NEW_SECONDARY"] = source_node
7934 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7938 def BuildHooksNodes(self):
7939 """Build hooks nodes.
7942 instance = self._migrater.instance
7943 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7944 return (nl, nl + [instance.primary_node])
7947 class LUInstanceMove(LogicalUnit):
7948 """Move an instance by data-copying.
7951 HPATH = "instance-move"
7952 HTYPE = constants.HTYPE_INSTANCE
7955 def ExpandNames(self):
7956 self._ExpandAndLockInstance()
7957 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7958 self.op.target_node = target_node
7959 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7960 self.needed_locks[locking.LEVEL_NODE_RES] = []
7961 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7963 def DeclareLocks(self, level):
7964 if level == locking.LEVEL_NODE:
7965 self._LockInstancesNodes(primary_only=True)
7966 elif level == locking.LEVEL_NODE_RES:
7968 self.needed_locks[locking.LEVEL_NODE_RES] = \
7969 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7971 def BuildHooksEnv(self):
7974 This runs on master, primary and secondary nodes of the instance.
7978 "TARGET_NODE": self.op.target_node,
7979 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7981 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7984 def BuildHooksNodes(self):
7985 """Build hooks nodes.
7989 self.cfg.GetMasterNode(),
7990 self.instance.primary_node,
7991 self.op.target_node,
7995 def CheckPrereq(self):
7996 """Check prerequisites.
7998 This checks that the instance is in the cluster.
8001 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8002 assert self.instance is not None, \
8003 "Cannot retrieve locked instance %s" % self.op.instance_name
8005 node = self.cfg.GetNodeInfo(self.op.target_node)
8006 assert node is not None, \
8007 "Cannot retrieve locked node %s" % self.op.target_node
8009 self.target_node = target_node = node.name
8011 if target_node == instance.primary_node:
8012 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8013 (instance.name, target_node),
8016 bep = self.cfg.GetClusterInfo().FillBE(instance)
8018 for idx, dsk in enumerate(instance.disks):
8019 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8020 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8021 " cannot copy" % idx, errors.ECODE_STATE)
8023 _CheckNodeOnline(self, target_node)
8024 _CheckNodeNotDrained(self, target_node)
8025 _CheckNodeVmCapable(self, target_node)
8026 cluster = self.cfg.GetClusterInfo()
8027 group_info = self.cfg.GetNodeGroup(node.group)
8028 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8029 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8030 ignore=self.op.ignore_ipolicy)
8032 if instance.admin_state == constants.ADMINST_UP:
8033 # check memory requirements on the secondary node
8034 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8035 instance.name, bep[constants.BE_MAXMEM],
8036 instance.hypervisor)
8038 self.LogInfo("Not checking memory on the secondary node as"
8039 " instance will not be started")
8041 # check bridge existance
8042 _CheckInstanceBridgesExist(self, instance, node=target_node)
8044 def Exec(self, feedback_fn):
8045 """Move an instance.
8047 The move is done by shutting it down on its present node, copying
8048 the data over (slow) and starting it on the new node.
8051 instance = self.instance
8053 source_node = instance.primary_node
8054 target_node = self.target_node
8056 self.LogInfo("Shutting down instance %s on source node %s",
8057 instance.name, source_node)
8059 assert (self.owned_locks(locking.LEVEL_NODE) ==
8060 self.owned_locks(locking.LEVEL_NODE_RES))
8062 result = self.rpc.call_instance_shutdown(source_node, instance,
8063 self.op.shutdown_timeout)
8064 msg = result.fail_msg
8066 if self.op.ignore_consistency:
8067 self.proc.LogWarning("Could not shutdown instance %s on node %s."
8068 " Proceeding anyway. Please make sure node"
8069 " %s is down. Error details: %s",
8070 instance.name, source_node, source_node, msg)
8072 raise errors.OpExecError("Could not shutdown instance %s on"
8074 (instance.name, source_node, msg))
8076 # create the target disks
8078 _CreateDisks(self, instance, target_node=target_node)
8079 except errors.OpExecError:
8080 self.LogWarning("Device creation failed, reverting...")
8082 _RemoveDisks(self, instance, target_node=target_node)
8084 self.cfg.ReleaseDRBDMinors(instance.name)
8087 cluster_name = self.cfg.GetClusterInfo().cluster_name
8090 # activate, get path, copy the data over
8091 for idx, disk in enumerate(instance.disks):
8092 self.LogInfo("Copying data for disk %d", idx)
8093 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8094 instance.name, True, idx)
8096 self.LogWarning("Can't assemble newly created disk %d: %s",
8097 idx, result.fail_msg)
8098 errs.append(result.fail_msg)
8100 dev_path = result.payload
8101 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8102 target_node, dev_path,
8105 self.LogWarning("Can't copy data over for disk %d: %s",
8106 idx, result.fail_msg)
8107 errs.append(result.fail_msg)
8111 self.LogWarning("Some disks failed to copy, aborting")
8113 _RemoveDisks(self, instance, target_node=target_node)
8115 self.cfg.ReleaseDRBDMinors(instance.name)
8116 raise errors.OpExecError("Errors during disk copy: %s" %
8119 instance.primary_node = target_node
8120 self.cfg.Update(instance, feedback_fn)
8122 self.LogInfo("Removing the disks on the original node")
8123 _RemoveDisks(self, instance, target_node=source_node)
8125 # Only start the instance if it's marked as up
8126 if instance.admin_state == constants.ADMINST_UP:
8127 self.LogInfo("Starting instance %s on node %s",
8128 instance.name, target_node)
8130 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8131 ignore_secondaries=True)
8133 _ShutdownInstanceDisks(self, instance)
8134 raise errors.OpExecError("Can't activate the instance's disks")
8136 result = self.rpc.call_instance_start(target_node,
8137 (instance, None, None), False)
8138 msg = result.fail_msg
8140 _ShutdownInstanceDisks(self, instance)
8141 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8142 (instance.name, target_node, msg))
8145 class LUNodeMigrate(LogicalUnit):
8146 """Migrate all instances from a node.
8149 HPATH = "node-migrate"
8150 HTYPE = constants.HTYPE_NODE
8153 def CheckArguments(self):
8156 def ExpandNames(self):
8157 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8159 self.share_locks = _ShareAll()
8160 self.needed_locks = {
8161 locking.LEVEL_NODE: [self.op.node_name],
8164 def BuildHooksEnv(self):
8167 This runs on the master, the primary and all the secondaries.
8171 "NODE_NAME": self.op.node_name,
8172 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8175 def BuildHooksNodes(self):
8176 """Build hooks nodes.
8179 nl = [self.cfg.GetMasterNode()]
8182 def CheckPrereq(self):
8185 def Exec(self, feedback_fn):
8186 # Prepare jobs for migration instances
8187 allow_runtime_changes = self.op.allow_runtime_changes
8189 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8192 iallocator=self.op.iallocator,
8193 target_node=self.op.target_node,
8194 allow_runtime_changes=allow_runtime_changes,
8195 ignore_ipolicy=self.op.ignore_ipolicy)]
8196 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8198 # TODO: Run iallocator in this opcode and pass correct placement options to
8199 # OpInstanceMigrate. Since other jobs can modify the cluster between
8200 # running the iallocator and the actual migration, a good consistency model
8201 # will have to be found.
8203 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8204 frozenset([self.op.node_name]))
8206 return ResultWithJobs(jobs)
8209 class TLMigrateInstance(Tasklet):
8210 """Tasklet class for instance migration.
8213 @ivar live: whether the migration will be done live or non-live;
8214 this variable is initalized only after CheckPrereq has run
8215 @type cleanup: boolean
8216 @ivar cleanup: Wheater we cleanup from a failed migration
8217 @type iallocator: string
8218 @ivar iallocator: The iallocator used to determine target_node
8219 @type target_node: string
8220 @ivar target_node: If given, the target_node to reallocate the instance to
8221 @type failover: boolean
8222 @ivar failover: Whether operation results in failover or migration
8223 @type fallback: boolean
8224 @ivar fallback: Whether fallback to failover is allowed if migration not
8226 @type ignore_consistency: boolean
8227 @ivar ignore_consistency: Wheter we should ignore consistency between source
8229 @type shutdown_timeout: int
8230 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8231 @type ignore_ipolicy: bool
8232 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8237 _MIGRATION_POLL_INTERVAL = 1 # seconds
8238 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8240 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8241 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8243 """Initializes this class.
8246 Tasklet.__init__(self, lu)
8249 self.instance_name = instance_name
8250 self.cleanup = cleanup
8251 self.live = False # will be overridden later
8252 self.failover = failover
8253 self.fallback = fallback
8254 self.ignore_consistency = ignore_consistency
8255 self.shutdown_timeout = shutdown_timeout
8256 self.ignore_ipolicy = ignore_ipolicy
8257 self.allow_runtime_changes = allow_runtime_changes
8259 def CheckPrereq(self):
8260 """Check prerequisites.
8262 This checks that the instance is in the cluster.
8265 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8266 instance = self.cfg.GetInstanceInfo(instance_name)
8267 assert instance is not None
8268 self.instance = instance
8269 cluster = self.cfg.GetClusterInfo()
8271 if (not self.cleanup and
8272 not instance.admin_state == constants.ADMINST_UP and
8273 not self.failover and self.fallback):
8274 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8275 " switching to failover")
8276 self.failover = True
8278 if instance.disk_template not in constants.DTS_MIRRORED:
8283 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8284 " %s" % (instance.disk_template, text),
8287 if instance.disk_template in constants.DTS_EXT_MIRROR:
8288 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8290 if self.lu.op.iallocator:
8291 self._RunAllocator()
8293 # We set set self.target_node as it is required by
8295 self.target_node = self.lu.op.target_node
8297 # Check that the target node is correct in terms of instance policy
8298 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8299 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8300 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8302 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8303 ignore=self.ignore_ipolicy)
8305 # self.target_node is already populated, either directly or by the
8307 target_node = self.target_node
8308 if self.target_node == instance.primary_node:
8309 raise errors.OpPrereqError("Cannot migrate instance %s"
8310 " to its primary (%s)" %
8311 (instance.name, instance.primary_node),
8314 if len(self.lu.tasklets) == 1:
8315 # It is safe to release locks only when we're the only tasklet
8317 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8318 keep=[instance.primary_node, self.target_node])
8321 secondary_nodes = instance.secondary_nodes
8322 if not secondary_nodes:
8323 raise errors.ConfigurationError("No secondary node but using"
8324 " %s disk template" %
8325 instance.disk_template)
8326 target_node = secondary_nodes[0]
8327 if self.lu.op.iallocator or (self.lu.op.target_node and
8328 self.lu.op.target_node != target_node):
8330 text = "failed over"
8333 raise errors.OpPrereqError("Instances with disk template %s cannot"
8334 " be %s to arbitrary nodes"
8335 " (neither an iallocator nor a target"
8336 " node can be passed)" %
8337 (instance.disk_template, text),
8339 nodeinfo = self.cfg.GetNodeInfo(target_node)
8340 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8341 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8343 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8344 ignore=self.ignore_ipolicy)
8346 i_be = cluster.FillBE(instance)
8348 # check memory requirements on the secondary node
8349 if (not self.cleanup and
8350 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8351 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8352 "migrating instance %s" %
8354 i_be[constants.BE_MINMEM],
8355 instance.hypervisor)
8357 self.lu.LogInfo("Not checking memory on the secondary node as"
8358 " instance will not be started")
8360 # check if failover must be forced instead of migration
8361 if (not self.cleanup and not self.failover and
8362 i_be[constants.BE_ALWAYS_FAILOVER]):
8363 self.lu.LogInfo("Instance configured to always failover; fallback"
8365 self.failover = True
8367 # check bridge existance
8368 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8370 if not self.cleanup:
8371 _CheckNodeNotDrained(self.lu, target_node)
8372 if not self.failover:
8373 result = self.rpc.call_instance_migratable(instance.primary_node,
8375 if result.fail_msg and self.fallback:
8376 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8378 self.failover = True
8380 result.Raise("Can't migrate, please use failover",
8381 prereq=True, ecode=errors.ECODE_STATE)
8383 assert not (self.failover and self.cleanup)
8385 if not self.failover:
8386 if self.lu.op.live is not None and self.lu.op.mode is not None:
8387 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8388 " parameters are accepted",
8390 if self.lu.op.live is not None:
8392 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8394 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8395 # reset the 'live' parameter to None so that repeated
8396 # invocations of CheckPrereq do not raise an exception
8397 self.lu.op.live = None
8398 elif self.lu.op.mode is None:
8399 # read the default value from the hypervisor
8400 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8401 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8403 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8405 # Failover is never live
8408 if not (self.failover or self.cleanup):
8409 remote_info = self.rpc.call_instance_info(instance.primary_node,
8411 instance.hypervisor)
8412 remote_info.Raise("Error checking instance on node %s" %
8413 instance.primary_node)
8414 instance_running = bool(remote_info.payload)
8415 if instance_running:
8416 self.current_mem = int(remote_info.payload["memory"])
8418 def _RunAllocator(self):
8419 """Run the allocator based on input opcode.
8422 # FIXME: add a self.ignore_ipolicy option
8423 req = iallocator.IAReqRelocate(name=self.instance_name,
8424 relocate_from=[self.instance.primary_node])
8425 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8427 ial.Run(self.lu.op.iallocator)
8430 raise errors.OpPrereqError("Can't compute nodes using"
8431 " iallocator '%s': %s" %
8432 (self.lu.op.iallocator, ial.info),
8434 self.target_node = ial.result[0]
8435 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8436 self.instance_name, self.lu.op.iallocator,
8437 utils.CommaJoin(ial.result))
8439 def _WaitUntilSync(self):
8440 """Poll with custom rpc for disk sync.
8442 This uses our own step-based rpc call.
8445 self.feedback_fn("* wait until resync is done")
8449 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8451 (self.instance.disks,
8454 for node, nres in result.items():
8455 nres.Raise("Cannot resync disks on node %s" % node)
8456 node_done, node_percent = nres.payload
8457 all_done = all_done and node_done
8458 if node_percent is not None:
8459 min_percent = min(min_percent, node_percent)
8461 if min_percent < 100:
8462 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8465 def _EnsureSecondary(self, node):
8466 """Demote a node to secondary.
8469 self.feedback_fn("* switching node %s to secondary mode" % node)
8471 for dev in self.instance.disks:
8472 self.cfg.SetDiskID(dev, node)
8474 result = self.rpc.call_blockdev_close(node, self.instance.name,
8475 self.instance.disks)
8476 result.Raise("Cannot change disk to secondary on node %s" % node)
8478 def _GoStandalone(self):
8479 """Disconnect from the network.
8482 self.feedback_fn("* changing into standalone mode")
8483 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8484 self.instance.disks)
8485 for node, nres in result.items():
8486 nres.Raise("Cannot disconnect disks node %s" % node)
8488 def _GoReconnect(self, multimaster):
8489 """Reconnect to the network.
8495 msg = "single-master"
8496 self.feedback_fn("* changing disks into %s mode" % msg)
8497 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8498 (self.instance.disks, self.instance),
8499 self.instance.name, multimaster)
8500 for node, nres in result.items():
8501 nres.Raise("Cannot change disks config on node %s" % node)
8503 def _ExecCleanup(self):
8504 """Try to cleanup after a failed migration.
8506 The cleanup is done by:
8507 - check that the instance is running only on one node
8508 (and update the config if needed)
8509 - change disks on its secondary node to secondary
8510 - wait until disks are fully synchronized
8511 - disconnect from the network
8512 - change disks into single-master mode
8513 - wait again until disks are fully synchronized
8516 instance = self.instance
8517 target_node = self.target_node
8518 source_node = self.source_node
8520 # check running on only one node
8521 self.feedback_fn("* checking where the instance actually runs"
8522 " (if this hangs, the hypervisor might be in"
8524 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8525 for node, result in ins_l.items():
8526 result.Raise("Can't contact node %s" % node)
8528 runningon_source = instance.name in ins_l[source_node].payload
8529 runningon_target = instance.name in ins_l[target_node].payload
8531 if runningon_source and runningon_target:
8532 raise errors.OpExecError("Instance seems to be running on two nodes,"
8533 " or the hypervisor is confused; you will have"
8534 " to ensure manually that it runs only on one"
8535 " and restart this operation")
8537 if not (runningon_source or runningon_target):
8538 raise errors.OpExecError("Instance does not seem to be running at all;"
8539 " in this case it's safer to repair by"
8540 " running 'gnt-instance stop' to ensure disk"
8541 " shutdown, and then restarting it")
8543 if runningon_target:
8544 # the migration has actually succeeded, we need to update the config
8545 self.feedback_fn("* instance running on secondary node (%s),"
8546 " updating config" % target_node)
8547 instance.primary_node = target_node
8548 self.cfg.Update(instance, self.feedback_fn)
8549 demoted_node = source_node
8551 self.feedback_fn("* instance confirmed to be running on its"
8552 " primary node (%s)" % source_node)
8553 demoted_node = target_node
8555 if instance.disk_template in constants.DTS_INT_MIRROR:
8556 self._EnsureSecondary(demoted_node)
8558 self._WaitUntilSync()
8559 except errors.OpExecError:
8560 # we ignore here errors, since if the device is standalone, it
8561 # won't be able to sync
8563 self._GoStandalone()
8564 self._GoReconnect(False)
8565 self._WaitUntilSync()
8567 self.feedback_fn("* done")
8569 def _RevertDiskStatus(self):
8570 """Try to revert the disk status after a failed migration.
8573 target_node = self.target_node
8574 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8578 self._EnsureSecondary(target_node)
8579 self._GoStandalone()
8580 self._GoReconnect(False)
8581 self._WaitUntilSync()
8582 except errors.OpExecError, err:
8583 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8584 " please try to recover the instance manually;"
8585 " error '%s'" % str(err))
8587 def _AbortMigration(self):
8588 """Call the hypervisor code to abort a started migration.
8591 instance = self.instance
8592 target_node = self.target_node
8593 source_node = self.source_node
8594 migration_info = self.migration_info
8596 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8600 abort_msg = abort_result.fail_msg
8602 logging.error("Aborting migration failed on target node %s: %s",
8603 target_node, abort_msg)
8604 # Don't raise an exception here, as we stil have to try to revert the
8605 # disk status, even if this step failed.
8607 abort_result = self.rpc.call_instance_finalize_migration_src(
8608 source_node, instance, False, self.live)
8609 abort_msg = abort_result.fail_msg
8611 logging.error("Aborting migration failed on source node %s: %s",
8612 source_node, abort_msg)
8614 def _ExecMigration(self):
8615 """Migrate an instance.
8617 The migrate is done by:
8618 - change the disks into dual-master mode
8619 - wait until disks are fully synchronized again
8620 - migrate the instance
8621 - change disks on the new secondary node (the old primary) to secondary
8622 - wait until disks are fully synchronized
8623 - change disks into single-master mode
8626 instance = self.instance
8627 target_node = self.target_node
8628 source_node = self.source_node
8630 # Check for hypervisor version mismatch and warn the user.
8631 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8632 None, [self.instance.hypervisor])
8633 for ninfo in nodeinfo.values():
8634 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8636 (_, _, (src_info, )) = nodeinfo[source_node].payload
8637 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8639 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8640 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8641 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8642 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8643 if src_version != dst_version:
8644 self.feedback_fn("* warning: hypervisor version mismatch between"
8645 " source (%s) and target (%s) node" %
8646 (src_version, dst_version))
8648 self.feedback_fn("* checking disk consistency between source and target")
8649 for (idx, dev) in enumerate(instance.disks):
8650 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8651 raise errors.OpExecError("Disk %s is degraded or not fully"
8652 " synchronized on target node,"
8653 " aborting migration" % idx)
8655 if self.current_mem > self.tgt_free_mem:
8656 if not self.allow_runtime_changes:
8657 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8658 " free memory to fit instance %s on target"
8659 " node %s (have %dMB, need %dMB)" %
8660 (instance.name, target_node,
8661 self.tgt_free_mem, self.current_mem))
8662 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8663 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8666 rpcres.Raise("Cannot modify instance runtime memory")
8668 # First get the migration information from the remote node
8669 result = self.rpc.call_migration_info(source_node, instance)
8670 msg = result.fail_msg
8672 log_err = ("Failed fetching source migration information from %s: %s" %
8674 logging.error(log_err)
8675 raise errors.OpExecError(log_err)
8677 self.migration_info = migration_info = result.payload
8679 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8680 # Then switch the disks to master/master mode
8681 self._EnsureSecondary(target_node)
8682 self._GoStandalone()
8683 self._GoReconnect(True)
8684 self._WaitUntilSync()
8686 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8687 result = self.rpc.call_accept_instance(target_node,
8690 self.nodes_ip[target_node])
8692 msg = result.fail_msg
8694 logging.error("Instance pre-migration failed, trying to revert"
8695 " disk status: %s", msg)
8696 self.feedback_fn("Pre-migration failed, aborting")
8697 self._AbortMigration()
8698 self._RevertDiskStatus()
8699 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8700 (instance.name, msg))
8702 self.feedback_fn("* migrating instance to %s" % target_node)
8703 result = self.rpc.call_instance_migrate(source_node, instance,
8704 self.nodes_ip[target_node],
8706 msg = result.fail_msg
8708 logging.error("Instance migration failed, trying to revert"
8709 " disk status: %s", msg)
8710 self.feedback_fn("Migration failed, aborting")
8711 self._AbortMigration()
8712 self._RevertDiskStatus()
8713 raise errors.OpExecError("Could not migrate instance %s: %s" %
8714 (instance.name, msg))
8716 self.feedback_fn("* starting memory transfer")
8717 last_feedback = time.time()
8719 result = self.rpc.call_instance_get_migration_status(source_node,
8721 msg = result.fail_msg
8722 ms = result.payload # MigrationStatus instance
8723 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8724 logging.error("Instance migration failed, trying to revert"
8725 " disk status: %s", msg)
8726 self.feedback_fn("Migration failed, aborting")
8727 self._AbortMigration()
8728 self._RevertDiskStatus()
8730 msg = "hypervisor returned failure"
8731 raise errors.OpExecError("Could not migrate instance %s: %s" %
8732 (instance.name, msg))
8734 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8735 self.feedback_fn("* memory transfer complete")
8738 if (utils.TimeoutExpired(last_feedback,
8739 self._MIGRATION_FEEDBACK_INTERVAL) and
8740 ms.transferred_ram is not None):
8741 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8742 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8743 last_feedback = time.time()
8745 time.sleep(self._MIGRATION_POLL_INTERVAL)
8747 result = self.rpc.call_instance_finalize_migration_src(source_node,
8751 msg = result.fail_msg
8753 logging.error("Instance migration succeeded, but finalization failed"
8754 " on the source node: %s", msg)
8755 raise errors.OpExecError("Could not finalize instance migration: %s" %
8758 instance.primary_node = target_node
8760 # distribute new instance config to the other nodes
8761 self.cfg.Update(instance, self.feedback_fn)
8763 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8767 msg = result.fail_msg
8769 logging.error("Instance migration succeeded, but finalization failed"
8770 " on the target node: %s", msg)
8771 raise errors.OpExecError("Could not finalize instance migration: %s" %
8774 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8775 self._EnsureSecondary(source_node)
8776 self._WaitUntilSync()
8777 self._GoStandalone()
8778 self._GoReconnect(False)
8779 self._WaitUntilSync()
8781 # If the instance's disk template is `rbd' and there was a successful
8782 # migration, unmap the device from the source node.
8783 if self.instance.disk_template == constants.DT_RBD:
8784 disks = _ExpandCheckDisks(instance, instance.disks)
8785 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8787 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8788 msg = result.fail_msg
8790 logging.error("Migration was successful, but couldn't unmap the"
8791 " block device %s on source node %s: %s",
8792 disk.iv_name, source_node, msg)
8793 logging.error("You need to unmap the device %s manually on %s",
8794 disk.iv_name, source_node)
8796 self.feedback_fn("* done")
8798 def _ExecFailover(self):
8799 """Failover an instance.
8801 The failover is done by shutting it down on its present node and
8802 starting it on the secondary.
8805 instance = self.instance
8806 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8808 source_node = instance.primary_node
8809 target_node = self.target_node
8811 if instance.admin_state == constants.ADMINST_UP:
8812 self.feedback_fn("* checking disk consistency between source and target")
8813 for (idx, dev) in enumerate(instance.disks):
8814 # for drbd, these are drbd over lvm
8815 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8817 if primary_node.offline:
8818 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8820 (primary_node.name, idx, target_node))
8821 elif not self.ignore_consistency:
8822 raise errors.OpExecError("Disk %s is degraded on target node,"
8823 " aborting failover" % idx)
8825 self.feedback_fn("* not checking disk consistency as instance is not"
8828 self.feedback_fn("* shutting down instance on source node")
8829 logging.info("Shutting down instance %s on node %s",
8830 instance.name, source_node)
8832 result = self.rpc.call_instance_shutdown(source_node, instance,
8833 self.shutdown_timeout)
8834 msg = result.fail_msg
8836 if self.ignore_consistency or primary_node.offline:
8837 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8838 " proceeding anyway; please make sure node"
8839 " %s is down; error details: %s",
8840 instance.name, source_node, source_node, msg)
8842 raise errors.OpExecError("Could not shutdown instance %s on"
8844 (instance.name, source_node, msg))
8846 self.feedback_fn("* deactivating the instance's disks on source node")
8847 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8848 raise errors.OpExecError("Can't shut down the instance's disks")
8850 instance.primary_node = target_node
8851 # distribute new instance config to the other nodes
8852 self.cfg.Update(instance, self.feedback_fn)
8854 # Only start the instance if it's marked as up
8855 if instance.admin_state == constants.ADMINST_UP:
8856 self.feedback_fn("* activating the instance's disks on target node %s" %
8858 logging.info("Starting instance %s on node %s",
8859 instance.name, target_node)
8861 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8862 ignore_secondaries=True)
8864 _ShutdownInstanceDisks(self.lu, instance)
8865 raise errors.OpExecError("Can't activate the instance's disks")
8867 self.feedback_fn("* starting the instance on the target node %s" %
8869 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8871 msg = result.fail_msg
8873 _ShutdownInstanceDisks(self.lu, instance)
8874 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8875 (instance.name, target_node, msg))
8877 def Exec(self, feedback_fn):
8878 """Perform the migration.
8881 self.feedback_fn = feedback_fn
8882 self.source_node = self.instance.primary_node
8884 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8885 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8886 self.target_node = self.instance.secondary_nodes[0]
8887 # Otherwise self.target_node has been populated either
8888 # directly, or through an iallocator.
8890 self.all_nodes = [self.source_node, self.target_node]
8891 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8892 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8895 feedback_fn("Failover instance %s" % self.instance.name)
8896 self._ExecFailover()
8898 feedback_fn("Migrating instance %s" % self.instance.name)
8901 return self._ExecCleanup()
8903 return self._ExecMigration()
8906 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8908 """Wrapper around L{_CreateBlockDevInner}.
8910 This method annotates the root device first.
8913 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8914 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8918 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8920 """Create a tree of block devices on a given node.
8922 If this device type has to be created on secondaries, create it and
8925 If not, just recurse to children keeping the same 'force' value.
8927 @attention: The device has to be annotated already.
8929 @param lu: the lu on whose behalf we execute
8930 @param node: the node on which to create the device
8931 @type instance: L{objects.Instance}
8932 @param instance: the instance which owns the device
8933 @type device: L{objects.Disk}
8934 @param device: the device to create
8935 @type force_create: boolean
8936 @param force_create: whether to force creation of this device; this
8937 will be change to True whenever we find a device which has
8938 CreateOnSecondary() attribute
8939 @param info: the extra 'metadata' we should attach to the device
8940 (this will be represented as a LVM tag)
8941 @type force_open: boolean
8942 @param force_open: this parameter will be passes to the
8943 L{backend.BlockdevCreate} function where it specifies
8944 whether we run on primary or not, and it affects both
8945 the child assembly and the device own Open() execution
8948 if device.CreateOnSecondary():
8952 for child in device.children:
8953 _CreateBlockDevInner(lu, node, instance, child, force_create,
8956 if not force_create:
8959 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8962 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8963 """Create a single block device on a given node.
8965 This will not recurse over children of the device, so they must be
8968 @param lu: the lu on whose behalf we execute
8969 @param node: the node on which to create the device
8970 @type instance: L{objects.Instance}
8971 @param instance: the instance which owns the device
8972 @type device: L{objects.Disk}
8973 @param device: the device to create
8974 @param info: the extra 'metadata' we should attach to the device
8975 (this will be represented as a LVM tag)
8976 @type force_open: boolean
8977 @param force_open: this parameter will be passes to the
8978 L{backend.BlockdevCreate} function where it specifies
8979 whether we run on primary or not, and it affects both
8980 the child assembly and the device own Open() execution
8983 lu.cfg.SetDiskID(device, node)
8984 result = lu.rpc.call_blockdev_create(node, device, device.size,
8985 instance.name, force_open, info)
8986 result.Raise("Can't create block device %s on"
8987 " node %s for instance %s" % (device, node, instance.name))
8988 if device.physical_id is None:
8989 device.physical_id = result.payload
8992 def _GenerateUniqueNames(lu, exts):
8993 """Generate a suitable LV name.
8995 This will generate a logical volume name for the given instance.
9000 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9001 results.append("%s%s" % (new_id, val))
9005 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9006 iv_name, p_minor, s_minor):
9007 """Generate a drbd8 device complete with its children.
9010 assert len(vgnames) == len(names) == 2
9011 port = lu.cfg.AllocatePort()
9012 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9014 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9015 logical_id=(vgnames[0], names[0]),
9017 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9018 size=constants.DRBD_META_SIZE,
9019 logical_id=(vgnames[1], names[1]),
9021 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9022 logical_id=(primary, secondary, port,
9025 children=[dev_data, dev_meta],
9026 iv_name=iv_name, params={})
9030 _DISK_TEMPLATE_NAME_PREFIX = {
9031 constants.DT_PLAIN: "",
9032 constants.DT_RBD: ".rbd",
9036 _DISK_TEMPLATE_DEVICE_TYPE = {
9037 constants.DT_PLAIN: constants.LD_LV,
9038 constants.DT_FILE: constants.LD_FILE,
9039 constants.DT_SHARED_FILE: constants.LD_FILE,
9040 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9041 constants.DT_RBD: constants.LD_RBD,
9045 def _GenerateDiskTemplate(
9046 lu, template_name, instance_name, primary_node, secondary_nodes,
9047 disk_info, file_storage_dir, file_driver, base_index,
9048 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9049 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9050 """Generate the entire disk layout for a given template type.
9053 #TODO: compute space requirements
9055 vgname = lu.cfg.GetVGName()
9056 disk_count = len(disk_info)
9059 if template_name == constants.DT_DISKLESS:
9061 elif template_name == constants.DT_DRBD8:
9062 if len(secondary_nodes) != 1:
9063 raise errors.ProgrammerError("Wrong template configuration")
9064 remote_node = secondary_nodes[0]
9065 minors = lu.cfg.AllocateDRBDMinor(
9066 [primary_node, remote_node] * len(disk_info), instance_name)
9068 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9070 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9073 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9074 for i in range(disk_count)]):
9075 names.append(lv_prefix + "_data")
9076 names.append(lv_prefix + "_meta")
9077 for idx, disk in enumerate(disk_info):
9078 disk_index = idx + base_index
9079 data_vg = disk.get(constants.IDISK_VG, vgname)
9080 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9081 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9082 disk[constants.IDISK_SIZE],
9084 names[idx * 2:idx * 2 + 2],
9085 "disk/%d" % disk_index,
9086 minors[idx * 2], minors[idx * 2 + 1])
9087 disk_dev.mode = disk[constants.IDISK_MODE]
9088 disks.append(disk_dev)
9091 raise errors.ProgrammerError("Wrong template configuration")
9093 if template_name == constants.DT_FILE:
9095 elif template_name == constants.DT_SHARED_FILE:
9096 _req_shr_file_storage()
9098 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9099 if name_prefix is None:
9102 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9103 (name_prefix, base_index + i)
9104 for i in range(disk_count)])
9106 if template_name == constants.DT_PLAIN:
9108 def logical_id_fn(idx, _, disk):
9109 vg = disk.get(constants.IDISK_VG, vgname)
9110 return (vg, names[idx])
9112 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9114 lambda _, disk_index, disk: (file_driver,
9115 "%s/disk%d" % (file_storage_dir,
9117 elif template_name == constants.DT_BLOCK:
9119 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9120 disk[constants.IDISK_ADOPT])
9121 elif template_name == constants.DT_RBD:
9122 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9124 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9126 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9128 for idx, disk in enumerate(disk_info):
9129 disk_index = idx + base_index
9130 size = disk[constants.IDISK_SIZE]
9131 feedback_fn("* disk %s, size %s" %
9132 (disk_index, utils.FormatUnit(size, "h")))
9133 disks.append(objects.Disk(dev_type=dev_type, size=size,
9134 logical_id=logical_id_fn(idx, disk_index, disk),
9135 iv_name="disk/%d" % disk_index,
9136 mode=disk[constants.IDISK_MODE],
9142 def _GetInstanceInfoText(instance):
9143 """Compute that text that should be added to the disk's metadata.
9146 return "originstname+%s" % instance.name
9149 def _CalcEta(time_taken, written, total_size):
9150 """Calculates the ETA based on size written and total size.
9152 @param time_taken: The time taken so far
9153 @param written: amount written so far
9154 @param total_size: The total size of data to be written
9155 @return: The remaining time in seconds
9158 avg_time = time_taken / float(written)
9159 return (total_size - written) * avg_time
9162 def _WipeDisks(lu, instance, disks=None):
9163 """Wipes instance disks.
9165 @type lu: L{LogicalUnit}
9166 @param lu: the logical unit on whose behalf we execute
9167 @type instance: L{objects.Instance}
9168 @param instance: the instance whose disks we should create
9169 @return: the success of the wipe
9172 node = instance.primary_node
9175 disks = [(idx, disk, 0)
9176 for (idx, disk) in enumerate(instance.disks)]
9178 for (_, device, _) in disks:
9179 lu.cfg.SetDiskID(device, node)
9181 logging.info("Pausing synchronization of disks of instance '%s'",
9183 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9184 (map(compat.snd, disks),
9187 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9189 for idx, success in enumerate(result.payload):
9191 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9192 " failed", idx, instance.name)
9195 for (idx, device, offset) in disks:
9196 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9197 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9199 int(min(constants.MAX_WIPE_CHUNK,
9200 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9204 start_time = time.time()
9209 info_text = (" (from %s to %s)" %
9210 (utils.FormatUnit(offset, "h"),
9211 utils.FormatUnit(size, "h")))
9213 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9215 logging.info("Wiping disk %d for instance %s on node %s using"
9216 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9218 while offset < size:
9219 wipe_size = min(wipe_chunk_size, size - offset)
9221 logging.debug("Wiping disk %d, offset %s, chunk %s",
9222 idx, offset, wipe_size)
9224 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9226 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9227 (idx, offset, wipe_size))
9231 if now - last_output >= 60:
9232 eta = _CalcEta(now - start_time, offset, size)
9233 lu.LogInfo(" - done: %.1f%% ETA: %s",
9234 offset / float(size) * 100, utils.FormatSeconds(eta))
9237 logging.info("Resuming synchronization of disks for instance '%s'",
9240 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9241 (map(compat.snd, disks),
9246 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9247 node, result.fail_msg)
9249 for idx, success in enumerate(result.payload):
9251 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9252 " failed", idx, instance.name)
9255 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9256 """Create all disks for an instance.
9258 This abstracts away some work from AddInstance.
9260 @type lu: L{LogicalUnit}
9261 @param lu: the logical unit on whose behalf we execute
9262 @type instance: L{objects.Instance}
9263 @param instance: the instance whose disks we should create
9265 @param to_skip: list of indices to skip
9266 @type target_node: string
9267 @param target_node: if passed, overrides the target node for creation
9269 @return: the success of the creation
9272 info = _GetInstanceInfoText(instance)
9273 if target_node is None:
9274 pnode = instance.primary_node
9275 all_nodes = instance.all_nodes
9280 if instance.disk_template in constants.DTS_FILEBASED:
9281 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9282 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9284 result.Raise("Failed to create directory '%s' on"
9285 " node %s" % (file_storage_dir, pnode))
9287 # Note: this needs to be kept in sync with adding of disks in
9288 # LUInstanceSetParams
9289 for idx, device in enumerate(instance.disks):
9290 if to_skip and idx in to_skip:
9292 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9294 for node in all_nodes:
9295 f_create = node == pnode
9296 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9299 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9300 """Remove all disks for an instance.
9302 This abstracts away some work from `AddInstance()` and
9303 `RemoveInstance()`. Note that in case some of the devices couldn't
9304 be removed, the removal will continue with the other ones (compare
9305 with `_CreateDisks()`).
9307 @type lu: L{LogicalUnit}
9308 @param lu: the logical unit on whose behalf we execute
9309 @type instance: L{objects.Instance}
9310 @param instance: the instance whose disks we should remove
9311 @type target_node: string
9312 @param target_node: used to override the node on which to remove the disks
9314 @return: the success of the removal
9317 logging.info("Removing block devices for instance %s", instance.name)
9320 ports_to_release = set()
9321 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9322 for (idx, device) in enumerate(anno_disks):
9324 edata = [(target_node, device)]
9326 edata = device.ComputeNodeTree(instance.primary_node)
9327 for node, disk in edata:
9328 lu.cfg.SetDiskID(disk, node)
9329 result = lu.rpc.call_blockdev_remove(node, disk)
9331 lu.LogWarning("Could not remove disk %s on node %s,"
9332 " continuing anyway: %s", idx, node, result.fail_msg)
9333 if not (result.offline and node != instance.primary_node):
9336 # if this is a DRBD disk, return its port to the pool
9337 if device.dev_type in constants.LDS_DRBD:
9338 ports_to_release.add(device.logical_id[2])
9340 if all_result or ignore_failures:
9341 for port in ports_to_release:
9342 lu.cfg.AddTcpUdpPort(port)
9344 if instance.disk_template in constants.DTS_FILEBASED:
9345 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9349 tgt = instance.primary_node
9350 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9352 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9353 file_storage_dir, instance.primary_node, result.fail_msg)
9359 def _ComputeDiskSizePerVG(disk_template, disks):
9360 """Compute disk size requirements in the volume group
9363 def _compute(disks, payload):
9364 """Universal algorithm.
9369 vgs[disk[constants.IDISK_VG]] = \
9370 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9374 # Required free disk space as a function of disk and swap space
9376 constants.DT_DISKLESS: {},
9377 constants.DT_PLAIN: _compute(disks, 0),
9378 # 128 MB are added for drbd metadata for each disk
9379 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9380 constants.DT_FILE: {},
9381 constants.DT_SHARED_FILE: {},
9384 if disk_template not in req_size_dict:
9385 raise errors.ProgrammerError("Disk template '%s' size requirement"
9386 " is unknown" % disk_template)
9388 return req_size_dict[disk_template]
9391 def _FilterVmNodes(lu, nodenames):
9392 """Filters out non-vm_capable nodes from a list.
9394 @type lu: L{LogicalUnit}
9395 @param lu: the logical unit for which we check
9396 @type nodenames: list
9397 @param nodenames: the list of nodes on which we should check
9399 @return: the list of vm-capable nodes
9402 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9403 return [name for name in nodenames if name not in vm_nodes]
9406 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9407 """Hypervisor parameter validation.
9409 This function abstract the hypervisor parameter validation to be
9410 used in both instance create and instance modify.
9412 @type lu: L{LogicalUnit}
9413 @param lu: the logical unit for which we check
9414 @type nodenames: list
9415 @param nodenames: the list of nodes on which we should check
9416 @type hvname: string
9417 @param hvname: the name of the hypervisor we should use
9418 @type hvparams: dict
9419 @param hvparams: the parameters which we need to check
9420 @raise errors.OpPrereqError: if the parameters are not valid
9423 nodenames = _FilterVmNodes(lu, nodenames)
9425 cluster = lu.cfg.GetClusterInfo()
9426 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9428 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9429 for node in nodenames:
9433 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9436 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9437 """OS parameters validation.
9439 @type lu: L{LogicalUnit}
9440 @param lu: the logical unit for which we check
9441 @type required: boolean
9442 @param required: whether the validation should fail if the OS is not
9444 @type nodenames: list
9445 @param nodenames: the list of nodes on which we should check
9446 @type osname: string
9447 @param osname: the name of the hypervisor we should use
9448 @type osparams: dict
9449 @param osparams: the parameters which we need to check
9450 @raise errors.OpPrereqError: if the parameters are not valid
9453 nodenames = _FilterVmNodes(lu, nodenames)
9454 result = lu.rpc.call_os_validate(nodenames, required, osname,
9455 [constants.OS_VALIDATE_PARAMETERS],
9457 for node, nres in result.items():
9458 # we don't check for offline cases since this should be run only
9459 # against the master node and/or an instance's nodes
9460 nres.Raise("OS Parameters validation failed on node %s" % node)
9461 if not nres.payload:
9462 lu.LogInfo("OS %s not found on node %s, validation skipped",
9466 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9467 """Wrapper around IAReqInstanceAlloc.
9469 @param op: The instance opcode
9470 @param disks: The computed disks
9471 @param nics: The computed nics
9472 @param beparams: The full filled beparams
9474 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9477 spindle_use = beparams[constants.BE_SPINDLE_USE]
9478 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9479 disk_template=op.disk_template,
9482 vcpus=beparams[constants.BE_VCPUS],
9483 memory=beparams[constants.BE_MAXMEM],
9484 spindle_use=spindle_use,
9486 nics=[n.ToDict() for n in nics],
9487 hypervisor=op.hypervisor)
9490 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9491 """Computes the nics.
9493 @param op: The instance opcode
9494 @param cluster: Cluster configuration object
9495 @param default_ip: The default ip to assign
9496 @param cfg: An instance of the configuration object
9497 @param proc: The executer instance
9499 @returns: The build up nics
9504 nic_mode_req = nic.get(constants.INIC_MODE, None)
9505 nic_mode = nic_mode_req
9506 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9507 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9509 net = nic.get(constants.INIC_NETWORK, None)
9510 link = nic.get(constants.NIC_LINK, None)
9511 ip = nic.get(constants.INIC_IP, None)
9513 if net is None or net.lower() == constants.VALUE_NONE:
9516 if nic_mode_req is not None or link is not None:
9517 raise errors.OpPrereqError("If network is given, no mode or link"
9518 " is allowed to be passed",
9521 # ip validity checks
9522 if ip is None or ip.lower() == constants.VALUE_NONE:
9524 elif ip.lower() == constants.VALUE_AUTO:
9525 if not op.name_check:
9526 raise errors.OpPrereqError("IP address set to auto but name checks"
9527 " have been skipped",
9531 # We defer pool operations until later, so that the iallocator has
9532 # filled in the instance's node(s) dimara
9533 if ip.lower() == constants.NIC_IP_POOL:
9535 raise errors.OpPrereqError("if ip=pool, parameter network"
9536 " must be passed too",
9539 elif not netutils.IPAddress.IsValid(ip):
9540 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9545 # TODO: check the ip address for uniqueness
9546 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9547 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9550 # MAC address verification
9551 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9552 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9553 mac = utils.NormalizeAndValidateMac(mac)
9556 # TODO: We need to factor this out
9557 cfg.ReserveMAC(mac, proc.GetECId())
9558 except errors.ReservationError:
9559 raise errors.OpPrereqError("MAC address %s already in use"
9560 " in cluster" % mac,
9561 errors.ECODE_NOTUNIQUE)
9563 # Build nic parameters
9566 nicparams[constants.NIC_MODE] = nic_mode
9568 nicparams[constants.NIC_LINK] = link
9570 check_params = cluster.SimpleFillNIC(nicparams)
9571 objects.NIC.CheckParameterSyntax(check_params)
9572 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9573 network=net, nicparams=nicparams))
9578 def _ComputeDisks(op, default_vg):
9579 """Computes the instance disks.
9581 @param op: The instance opcode
9582 @param default_vg: The default_vg to assume
9584 @return: The computer disks
9588 for disk in op.disks:
9589 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9590 if mode not in constants.DISK_ACCESS_SET:
9591 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9592 mode, errors.ECODE_INVAL)
9593 size = disk.get(constants.IDISK_SIZE, None)
9595 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9598 except (TypeError, ValueError):
9599 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9602 data_vg = disk.get(constants.IDISK_VG, default_vg)
9604 constants.IDISK_SIZE: size,
9605 constants.IDISK_MODE: mode,
9606 constants.IDISK_VG: data_vg,
9608 if constants.IDISK_METAVG in disk:
9609 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9610 if constants.IDISK_ADOPT in disk:
9611 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9612 disks.append(new_disk)
9617 def _ComputeFullBeParams(op, cluster):
9618 """Computes the full beparams.
9620 @param op: The instance opcode
9621 @param cluster: The cluster config object
9623 @return: The fully filled beparams
9626 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9627 for param, value in op.beparams.iteritems():
9628 if value == constants.VALUE_AUTO:
9629 op.beparams[param] = default_beparams[param]
9630 objects.UpgradeBeParams(op.beparams)
9631 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9632 return cluster.SimpleFillBE(op.beparams)
9635 class LUInstanceCreate(LogicalUnit):
9636 """Create an instance.
9639 HPATH = "instance-add"
9640 HTYPE = constants.HTYPE_INSTANCE
9643 def CheckArguments(self):
9647 # do not require name_check to ease forward/backward compatibility
9649 if self.op.no_install and self.op.start:
9650 self.LogInfo("No-installation mode selected, disabling startup")
9651 self.op.start = False
9652 # validate/normalize the instance name
9653 self.op.instance_name = \
9654 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9656 if self.op.ip_check and not self.op.name_check:
9657 # TODO: make the ip check more flexible and not depend on the name check
9658 raise errors.OpPrereqError("Cannot do IP address check without a name"
9659 " check", errors.ECODE_INVAL)
9661 # check nics' parameter names
9662 for nic in self.op.nics:
9663 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9665 # check disks. parameter names and consistent adopt/no-adopt strategy
9666 has_adopt = has_no_adopt = False
9667 for disk in self.op.disks:
9668 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9669 if constants.IDISK_ADOPT in disk:
9673 if has_adopt and has_no_adopt:
9674 raise errors.OpPrereqError("Either all disks are adopted or none is",
9677 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9678 raise errors.OpPrereqError("Disk adoption is not supported for the"
9679 " '%s' disk template" %
9680 self.op.disk_template,
9682 if self.op.iallocator is not None:
9683 raise errors.OpPrereqError("Disk adoption not allowed with an"
9684 " iallocator script", errors.ECODE_INVAL)
9685 if self.op.mode == constants.INSTANCE_IMPORT:
9686 raise errors.OpPrereqError("Disk adoption not allowed for"
9687 " instance import", errors.ECODE_INVAL)
9689 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9690 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9691 " but no 'adopt' parameter given" %
9692 self.op.disk_template,
9695 self.adopt_disks = has_adopt
9697 # instance name verification
9698 if self.op.name_check:
9699 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9700 self.op.instance_name = self.hostname1.name
9701 # used in CheckPrereq for ip ping check
9702 self.check_ip = self.hostname1.ip
9704 self.check_ip = None
9706 # file storage checks
9707 if (self.op.file_driver and
9708 not self.op.file_driver in constants.FILE_DRIVER):
9709 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9710 self.op.file_driver, errors.ECODE_INVAL)
9712 if self.op.disk_template == constants.DT_FILE:
9713 opcodes.RequireFileStorage()
9714 elif self.op.disk_template == constants.DT_SHARED_FILE:
9715 opcodes.RequireSharedFileStorage()
9717 ### Node/iallocator related checks
9718 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9720 if self.op.pnode is not None:
9721 if self.op.disk_template in constants.DTS_INT_MIRROR:
9722 if self.op.snode is None:
9723 raise errors.OpPrereqError("The networked disk templates need"
9724 " a mirror node", errors.ECODE_INVAL)
9726 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9728 self.op.snode = None
9730 self._cds = _GetClusterDomainSecret()
9732 if self.op.mode == constants.INSTANCE_IMPORT:
9733 # On import force_variant must be True, because if we forced it at
9734 # initial install, our only chance when importing it back is that it
9736 self.op.force_variant = True
9738 if self.op.no_install:
9739 self.LogInfo("No-installation mode has no effect during import")
9741 elif self.op.mode == constants.INSTANCE_CREATE:
9742 if self.op.os_type is None:
9743 raise errors.OpPrereqError("No guest OS specified",
9745 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9746 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9747 " installation" % self.op.os_type,
9749 if self.op.disk_template is None:
9750 raise errors.OpPrereqError("No disk template specified",
9753 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9754 # Check handshake to ensure both clusters have the same domain secret
9755 src_handshake = self.op.source_handshake
9756 if not src_handshake:
9757 raise errors.OpPrereqError("Missing source handshake",
9760 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9763 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9766 # Load and check source CA
9767 self.source_x509_ca_pem = self.op.source_x509_ca
9768 if not self.source_x509_ca_pem:
9769 raise errors.OpPrereqError("Missing source X509 CA",
9773 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9775 except OpenSSL.crypto.Error, err:
9776 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9777 (err, ), errors.ECODE_INVAL)
9779 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9780 if errcode is not None:
9781 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9784 self.source_x509_ca = cert
9786 src_instance_name = self.op.source_instance_name
9787 if not src_instance_name:
9788 raise errors.OpPrereqError("Missing source instance name",
9791 self.source_instance_name = \
9792 netutils.GetHostname(name=src_instance_name).name
9795 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9796 self.op.mode, errors.ECODE_INVAL)
9798 def ExpandNames(self):
9799 """ExpandNames for CreateInstance.
9801 Figure out the right locks for instance creation.
9804 self.needed_locks = {}
9806 instance_name = self.op.instance_name
9807 # this is just a preventive check, but someone might still add this
9808 # instance in the meantime, and creation will fail at lock-add time
9809 if instance_name in self.cfg.GetInstanceList():
9810 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9811 instance_name, errors.ECODE_EXISTS)
9813 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9815 if self.op.iallocator:
9816 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9817 # specifying a group on instance creation and then selecting nodes from
9819 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9820 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9822 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9823 nodelist = [self.op.pnode]
9824 if self.op.snode is not None:
9825 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9826 nodelist.append(self.op.snode)
9827 self.needed_locks[locking.LEVEL_NODE] = nodelist
9828 # Lock resources of instance's primary and secondary nodes (copy to
9829 # prevent accidential modification)
9830 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9832 # in case of import lock the source node too
9833 if self.op.mode == constants.INSTANCE_IMPORT:
9834 src_node = self.op.src_node
9835 src_path = self.op.src_path
9837 if src_path is None:
9838 self.op.src_path = src_path = self.op.instance_name
9840 if src_node is None:
9841 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9842 self.op.src_node = None
9843 if os.path.isabs(src_path):
9844 raise errors.OpPrereqError("Importing an instance from a path"
9845 " requires a source node option",
9848 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9849 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9850 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9851 if not os.path.isabs(src_path):
9852 self.op.src_path = src_path = \
9853 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9855 def _RunAllocator(self):
9856 """Run the allocator based on input opcode.
9859 #TODO Export network to iallocator so that it chooses a pnode
9860 # in a nodegroup that has the desired network connected to
9861 req = _CreateInstanceAllocRequest(self.op, self.disks,
9862 self.nics, self.be_full)
9863 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9865 ial.Run(self.op.iallocator)
9868 raise errors.OpPrereqError("Can't compute nodes using"
9869 " iallocator '%s': %s" %
9870 (self.op.iallocator, ial.info),
9872 self.op.pnode = ial.result[0]
9873 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9874 self.op.instance_name, self.op.iallocator,
9875 utils.CommaJoin(ial.result))
9877 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9879 if req.RequiredNodes() == 2:
9880 self.op.snode = ial.result[1]
9882 def BuildHooksEnv(self):
9885 This runs on master, primary and secondary nodes of the instance.
9889 "ADD_MODE": self.op.mode,
9891 if self.op.mode == constants.INSTANCE_IMPORT:
9892 env["SRC_NODE"] = self.op.src_node
9893 env["SRC_PATH"] = self.op.src_path
9894 env["SRC_IMAGES"] = self.src_images
9896 env.update(_BuildInstanceHookEnv(
9897 name=self.op.instance_name,
9898 primary_node=self.op.pnode,
9899 secondary_nodes=self.secondaries,
9900 status=self.op.start,
9901 os_type=self.op.os_type,
9902 minmem=self.be_full[constants.BE_MINMEM],
9903 maxmem=self.be_full[constants.BE_MAXMEM],
9904 vcpus=self.be_full[constants.BE_VCPUS],
9905 nics=_NICListToTuple(self, self.nics),
9906 disk_template=self.op.disk_template,
9907 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9908 for d in self.disks],
9911 hypervisor_name=self.op.hypervisor,
9917 def BuildHooksNodes(self):
9918 """Build hooks nodes.
9921 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9924 def _ReadExportInfo(self):
9925 """Reads the export information from disk.
9927 It will override the opcode source node and path with the actual
9928 information, if these two were not specified before.
9930 @return: the export information
9933 assert self.op.mode == constants.INSTANCE_IMPORT
9935 src_node = self.op.src_node
9936 src_path = self.op.src_path
9938 if src_node is None:
9939 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9940 exp_list = self.rpc.call_export_list(locked_nodes)
9942 for node in exp_list:
9943 if exp_list[node].fail_msg:
9945 if src_path in exp_list[node].payload:
9947 self.op.src_node = src_node = node
9948 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9952 raise errors.OpPrereqError("No export found for relative path %s" %
9953 src_path, errors.ECODE_INVAL)
9955 _CheckNodeOnline(self, src_node)
9956 result = self.rpc.call_export_info(src_node, src_path)
9957 result.Raise("No export or invalid export found in dir %s" % src_path)
9959 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9960 if not export_info.has_section(constants.INISECT_EXP):
9961 raise errors.ProgrammerError("Corrupted export config",
9962 errors.ECODE_ENVIRON)
9964 ei_version = export_info.get(constants.INISECT_EXP, "version")
9965 if (int(ei_version) != constants.EXPORT_VERSION):
9966 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9967 (ei_version, constants.EXPORT_VERSION),
9968 errors.ECODE_ENVIRON)
9971 def _ReadExportParams(self, einfo):
9972 """Use export parameters as defaults.
9974 In case the opcode doesn't specify (as in override) some instance
9975 parameters, then try to use them from the export information, if
9979 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9981 if self.op.disk_template is None:
9982 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9983 self.op.disk_template = einfo.get(constants.INISECT_INS,
9985 if self.op.disk_template not in constants.DISK_TEMPLATES:
9986 raise errors.OpPrereqError("Disk template specified in configuration"
9987 " file is not one of the allowed values:"
9989 " ".join(constants.DISK_TEMPLATES),
9992 raise errors.OpPrereqError("No disk template specified and the export"
9993 " is missing the disk_template information",
9996 if not self.op.disks:
9998 # TODO: import the disk iv_name too
9999 for idx in range(constants.MAX_DISKS):
10000 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10001 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10002 disks.append({constants.IDISK_SIZE: disk_sz})
10003 self.op.disks = disks
10004 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10005 raise errors.OpPrereqError("No disk info specified and the export"
10006 " is missing the disk information",
10007 errors.ECODE_INVAL)
10009 if not self.op.nics:
10011 for idx in range(constants.MAX_NICS):
10012 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10014 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10015 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10020 self.op.nics = nics
10022 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10023 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10025 if (self.op.hypervisor is None and
10026 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10027 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10029 if einfo.has_section(constants.INISECT_HYP):
10030 # use the export parameters but do not override the ones
10031 # specified by the user
10032 for name, value in einfo.items(constants.INISECT_HYP):
10033 if name not in self.op.hvparams:
10034 self.op.hvparams[name] = value
10036 if einfo.has_section(constants.INISECT_BEP):
10037 # use the parameters, without overriding
10038 for name, value in einfo.items(constants.INISECT_BEP):
10039 if name not in self.op.beparams:
10040 self.op.beparams[name] = value
10041 # Compatibility for the old "memory" be param
10042 if name == constants.BE_MEMORY:
10043 if constants.BE_MAXMEM not in self.op.beparams:
10044 self.op.beparams[constants.BE_MAXMEM] = value
10045 if constants.BE_MINMEM not in self.op.beparams:
10046 self.op.beparams[constants.BE_MINMEM] = value
10048 # try to read the parameters old style, from the main section
10049 for name in constants.BES_PARAMETERS:
10050 if (name not in self.op.beparams and
10051 einfo.has_option(constants.INISECT_INS, name)):
10052 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10054 if einfo.has_section(constants.INISECT_OSP):
10055 # use the parameters, without overriding
10056 for name, value in einfo.items(constants.INISECT_OSP):
10057 if name not in self.op.osparams:
10058 self.op.osparams[name] = value
10060 def _RevertToDefaults(self, cluster):
10061 """Revert the instance parameters to the default values.
10065 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10066 for name in self.op.hvparams.keys():
10067 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10068 del self.op.hvparams[name]
10070 be_defs = cluster.SimpleFillBE({})
10071 for name in self.op.beparams.keys():
10072 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10073 del self.op.beparams[name]
10075 nic_defs = cluster.SimpleFillNIC({})
10076 for nic in self.op.nics:
10077 for name in constants.NICS_PARAMETERS:
10078 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10081 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10082 for name in self.op.osparams.keys():
10083 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10084 del self.op.osparams[name]
10086 def _CalculateFileStorageDir(self):
10087 """Calculate final instance file storage dir.
10090 # file storage dir calculation/check
10091 self.instance_file_storage_dir = None
10092 if self.op.disk_template in constants.DTS_FILEBASED:
10093 # build the full file storage dir path
10096 if self.op.disk_template == constants.DT_SHARED_FILE:
10097 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10099 get_fsd_fn = self.cfg.GetFileStorageDir
10101 cfg_storagedir = get_fsd_fn()
10102 if not cfg_storagedir:
10103 raise errors.OpPrereqError("Cluster file storage dir not defined",
10104 errors.ECODE_STATE)
10105 joinargs.append(cfg_storagedir)
10107 if self.op.file_storage_dir is not None:
10108 joinargs.append(self.op.file_storage_dir)
10110 joinargs.append(self.op.instance_name)
10112 # pylint: disable=W0142
10113 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10115 def CheckPrereq(self): # pylint: disable=R0914
10116 """Check prerequisites.
10119 self._CalculateFileStorageDir()
10121 if self.op.mode == constants.INSTANCE_IMPORT:
10122 export_info = self._ReadExportInfo()
10123 self._ReadExportParams(export_info)
10124 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10126 self._old_instance_name = None
10128 if (not self.cfg.GetVGName() and
10129 self.op.disk_template not in constants.DTS_NOT_LVM):
10130 raise errors.OpPrereqError("Cluster does not support lvm-based"
10131 " instances", errors.ECODE_STATE)
10133 if (self.op.hypervisor is None or
10134 self.op.hypervisor == constants.VALUE_AUTO):
10135 self.op.hypervisor = self.cfg.GetHypervisorType()
10137 cluster = self.cfg.GetClusterInfo()
10138 enabled_hvs = cluster.enabled_hypervisors
10139 if self.op.hypervisor not in enabled_hvs:
10140 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10142 (self.op.hypervisor, ",".join(enabled_hvs)),
10143 errors.ECODE_STATE)
10145 # Check tag validity
10146 for tag in self.op.tags:
10147 objects.TaggableObject.ValidateTag(tag)
10149 # check hypervisor parameter syntax (locally)
10150 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10151 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10153 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10154 hv_type.CheckParameterSyntax(filled_hvp)
10155 self.hv_full = filled_hvp
10156 # check that we don't specify global parameters on an instance
10157 _CheckGlobalHvParams(self.op.hvparams)
10159 # fill and remember the beparams dict
10160 self.be_full = _ComputeFullBeParams(self.op, cluster)
10162 # build os parameters
10163 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10165 # now that hvp/bep are in final format, let's reset to defaults,
10167 if self.op.identify_defaults:
10168 self._RevertToDefaults(cluster)
10171 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10174 # disk checks/pre-build
10175 default_vg = self.cfg.GetVGName()
10176 self.disks = _ComputeDisks(self.op, default_vg)
10178 if self.op.mode == constants.INSTANCE_IMPORT:
10180 for idx in range(len(self.disks)):
10181 option = "disk%d_dump" % idx
10182 if export_info.has_option(constants.INISECT_INS, option):
10183 # FIXME: are the old os-es, disk sizes, etc. useful?
10184 export_name = export_info.get(constants.INISECT_INS, option)
10185 image = utils.PathJoin(self.op.src_path, export_name)
10186 disk_images.append(image)
10188 disk_images.append(False)
10190 self.src_images = disk_images
10192 if self.op.instance_name == self._old_instance_name:
10193 for idx, nic in enumerate(self.nics):
10194 if nic.mac == constants.VALUE_AUTO:
10195 nic_mac_ini = "nic%d_mac" % idx
10196 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10198 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10200 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10201 if self.op.ip_check:
10202 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10203 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10204 (self.check_ip, self.op.instance_name),
10205 errors.ECODE_NOTUNIQUE)
10207 #### mac address generation
10208 # By generating here the mac address both the allocator and the hooks get
10209 # the real final mac address rather than the 'auto' or 'generate' value.
10210 # There is a race condition between the generation and the instance object
10211 # creation, which means that we know the mac is valid now, but we're not
10212 # sure it will be when we actually add the instance. If things go bad
10213 # adding the instance will abort because of a duplicate mac, and the
10214 # creation job will fail.
10215 for nic in self.nics:
10216 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10217 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10221 if self.op.iallocator is not None:
10222 self._RunAllocator()
10224 # Release all unneeded node locks
10225 _ReleaseLocks(self, locking.LEVEL_NODE,
10226 keep=filter(None, [self.op.pnode, self.op.snode,
10227 self.op.src_node]))
10228 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10229 keep=filter(None, [self.op.pnode, self.op.snode,
10230 self.op.src_node]))
10232 #### node related checks
10234 # check primary node
10235 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10236 assert self.pnode is not None, \
10237 "Cannot retrieve locked node %s" % self.op.pnode
10239 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10240 pnode.name, errors.ECODE_STATE)
10242 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10243 pnode.name, errors.ECODE_STATE)
10244 if not pnode.vm_capable:
10245 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10246 " '%s'" % pnode.name, errors.ECODE_STATE)
10248 self.secondaries = []
10250 # Fill in any IPs from IP pools. This must happen here, because we need to
10251 # know the nic's primary node, as specified by the iallocator
10252 for idx, nic in enumerate(self.nics):
10254 if net is not None:
10255 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10256 if netparams is None:
10257 raise errors.OpPrereqError("No netparams found for network"
10258 " %s. Propably not connected to"
10259 " node's %s nodegroup" %
10260 (net, self.pnode.name),
10261 errors.ECODE_INVAL)
10262 self.LogInfo("NIC/%d inherits netparams %s" %
10263 (idx, netparams.values()))
10264 nic.nicparams = dict(netparams)
10265 if nic.ip is not None:
10266 if nic.ip.lower() == constants.NIC_IP_POOL:
10268 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10269 except errors.ReservationError:
10270 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10271 " from the address pool" % idx,
10272 errors.ECODE_STATE)
10273 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10276 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10277 except errors.ReservationError:
10278 raise errors.OpPrereqError("IP address %s already in use"
10279 " or does not belong to network %s" %
10281 errors.ECODE_NOTUNIQUE)
10283 # net is None, ip None or given
10284 if self.op.conflicts_check:
10285 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10287 # mirror node verification
10288 if self.op.disk_template in constants.DTS_INT_MIRROR:
10289 if self.op.snode == pnode.name:
10290 raise errors.OpPrereqError("The secondary node cannot be the"
10291 " primary node", errors.ECODE_INVAL)
10292 _CheckNodeOnline(self, self.op.snode)
10293 _CheckNodeNotDrained(self, self.op.snode)
10294 _CheckNodeVmCapable(self, self.op.snode)
10295 self.secondaries.append(self.op.snode)
10297 snode = self.cfg.GetNodeInfo(self.op.snode)
10298 if pnode.group != snode.group:
10299 self.LogWarning("The primary and secondary nodes are in two"
10300 " different node groups; the disk parameters"
10301 " from the first disk's node group will be"
10304 nodenames = [pnode.name] + self.secondaries
10306 # Verify instance specs
10307 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10309 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10310 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10311 constants.ISPEC_DISK_COUNT: len(self.disks),
10312 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10313 constants.ISPEC_NIC_COUNT: len(self.nics),
10314 constants.ISPEC_SPINDLE_USE: spindle_use,
10317 group_info = self.cfg.GetNodeGroup(pnode.group)
10318 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10319 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10320 if not self.op.ignore_ipolicy and res:
10321 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10322 (pnode.group, group_info.name, utils.CommaJoin(res)))
10323 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10325 if not self.adopt_disks:
10326 if self.op.disk_template == constants.DT_RBD:
10327 # _CheckRADOSFreeSpace() is just a placeholder.
10328 # Any function that checks prerequisites can be placed here.
10329 # Check if there is enough space on the RADOS cluster.
10330 _CheckRADOSFreeSpace()
10332 # Check lv size requirements, if not adopting
10333 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10334 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10336 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10337 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10338 disk[constants.IDISK_ADOPT])
10339 for disk in self.disks])
10340 if len(all_lvs) != len(self.disks):
10341 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10342 errors.ECODE_INVAL)
10343 for lv_name in all_lvs:
10345 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10346 # to ReserveLV uses the same syntax
10347 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10348 except errors.ReservationError:
10349 raise errors.OpPrereqError("LV named %s used by another instance" %
10350 lv_name, errors.ECODE_NOTUNIQUE)
10352 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10353 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10355 node_lvs = self.rpc.call_lv_list([pnode.name],
10356 vg_names.payload.keys())[pnode.name]
10357 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10358 node_lvs = node_lvs.payload
10360 delta = all_lvs.difference(node_lvs.keys())
10362 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10363 utils.CommaJoin(delta),
10364 errors.ECODE_INVAL)
10365 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10367 raise errors.OpPrereqError("Online logical volumes found, cannot"
10368 " adopt: %s" % utils.CommaJoin(online_lvs),
10369 errors.ECODE_STATE)
10370 # update the size of disk based on what is found
10371 for dsk in self.disks:
10372 dsk[constants.IDISK_SIZE] = \
10373 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10374 dsk[constants.IDISK_ADOPT])][0]))
10376 elif self.op.disk_template == constants.DT_BLOCK:
10377 # Normalize and de-duplicate device paths
10378 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10379 for disk in self.disks])
10380 if len(all_disks) != len(self.disks):
10381 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10382 errors.ECODE_INVAL)
10383 baddisks = [d for d in all_disks
10384 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10386 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10387 " cannot be adopted" %
10388 (utils.CommaJoin(baddisks),
10389 constants.ADOPTABLE_BLOCKDEV_ROOT),
10390 errors.ECODE_INVAL)
10392 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10393 list(all_disks))[pnode.name]
10394 node_disks.Raise("Cannot get block device information from node %s" %
10396 node_disks = node_disks.payload
10397 delta = all_disks.difference(node_disks.keys())
10399 raise errors.OpPrereqError("Missing block device(s): %s" %
10400 utils.CommaJoin(delta),
10401 errors.ECODE_INVAL)
10402 for dsk in self.disks:
10403 dsk[constants.IDISK_SIZE] = \
10404 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10406 # Verify instance specs
10407 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10409 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10410 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10411 constants.ISPEC_DISK_COUNT: len(self.disks),
10412 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10413 for disk in self.disks],
10414 constants.ISPEC_NIC_COUNT: len(self.nics),
10415 constants.ISPEC_SPINDLE_USE: spindle_use,
10418 group_info = self.cfg.GetNodeGroup(pnode.group)
10419 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10420 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10421 if not self.op.ignore_ipolicy and res:
10422 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10423 " policy: %s") % (pnode.group,
10424 utils.CommaJoin(res)),
10425 errors.ECODE_INVAL)
10427 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10429 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10430 # check OS parameters (remotely)
10431 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10433 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10435 # memory check on primary node
10436 #TODO(dynmem): use MINMEM for checking
10438 _CheckNodeFreeMemory(self, self.pnode.name,
10439 "creating instance %s" % self.op.instance_name,
10440 self.be_full[constants.BE_MAXMEM],
10441 self.op.hypervisor)
10443 self.dry_run_result = list(nodenames)
10445 def Exec(self, feedback_fn):
10446 """Create and add the instance to the cluster.
10449 instance = self.op.instance_name
10450 pnode_name = self.pnode.name
10452 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10453 self.owned_locks(locking.LEVEL_NODE)), \
10454 "Node locks differ from node resource locks"
10456 ht_kind = self.op.hypervisor
10457 if ht_kind in constants.HTS_REQ_PORT:
10458 network_port = self.cfg.AllocatePort()
10460 network_port = None
10462 # This is ugly but we got a chicken-egg problem here
10463 # We can only take the group disk parameters, as the instance
10464 # has no disks yet (we are generating them right here).
10465 node = self.cfg.GetNodeInfo(pnode_name)
10466 nodegroup = self.cfg.GetNodeGroup(node.group)
10467 disks = _GenerateDiskTemplate(self,
10468 self.op.disk_template,
10469 instance, pnode_name,
10472 self.instance_file_storage_dir,
10473 self.op.file_driver,
10476 self.cfg.GetGroupDiskParams(nodegroup))
10478 iobj = objects.Instance(name=instance, os=self.op.os_type,
10479 primary_node=pnode_name,
10480 nics=self.nics, disks=disks,
10481 disk_template=self.op.disk_template,
10482 admin_state=constants.ADMINST_DOWN,
10483 network_port=network_port,
10484 beparams=self.op.beparams,
10485 hvparams=self.op.hvparams,
10486 hypervisor=self.op.hypervisor,
10487 osparams=self.op.osparams,
10491 for tag in self.op.tags:
10494 if self.adopt_disks:
10495 if self.op.disk_template == constants.DT_PLAIN:
10496 # rename LVs to the newly-generated names; we need to construct
10497 # 'fake' LV disks with the old data, plus the new unique_id
10498 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10500 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10501 rename_to.append(t_dsk.logical_id)
10502 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10503 self.cfg.SetDiskID(t_dsk, pnode_name)
10504 result = self.rpc.call_blockdev_rename(pnode_name,
10505 zip(tmp_disks, rename_to))
10506 result.Raise("Failed to rename adoped LVs")
10508 feedback_fn("* creating instance disks...")
10510 _CreateDisks(self, iobj)
10511 except errors.OpExecError:
10512 self.LogWarning("Device creation failed, reverting...")
10514 _RemoveDisks(self, iobj)
10516 self.cfg.ReleaseDRBDMinors(instance)
10519 feedback_fn("adding instance %s to cluster config" % instance)
10521 self.cfg.AddInstance(iobj, self.proc.GetECId())
10523 # Declare that we don't want to remove the instance lock anymore, as we've
10524 # added the instance to the config
10525 del self.remove_locks[locking.LEVEL_INSTANCE]
10527 if self.op.mode == constants.INSTANCE_IMPORT:
10528 # Release unused nodes
10529 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10531 # Release all nodes
10532 _ReleaseLocks(self, locking.LEVEL_NODE)
10535 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10536 feedback_fn("* wiping instance disks...")
10538 _WipeDisks(self, iobj)
10539 except errors.OpExecError, err:
10540 logging.exception("Wiping disks failed")
10541 self.LogWarning("Wiping instance disks failed (%s)", err)
10545 # Something is already wrong with the disks, don't do anything else
10547 elif self.op.wait_for_sync:
10548 disk_abort = not _WaitForSync(self, iobj)
10549 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10550 # make sure the disks are not degraded (still sync-ing is ok)
10551 feedback_fn("* checking mirrors status")
10552 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10557 _RemoveDisks(self, iobj)
10558 self.cfg.RemoveInstance(iobj.name)
10559 # Make sure the instance lock gets removed
10560 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10561 raise errors.OpExecError("There are some degraded disks for"
10564 # Release all node resource locks
10565 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10567 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10568 # we need to set the disks ID to the primary node, since the
10569 # preceding code might or might have not done it, depending on
10570 # disk template and other options
10571 for disk in iobj.disks:
10572 self.cfg.SetDiskID(disk, pnode_name)
10573 if self.op.mode == constants.INSTANCE_CREATE:
10574 if not self.op.no_install:
10575 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10576 not self.op.wait_for_sync)
10578 feedback_fn("* pausing disk sync to install instance OS")
10579 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10582 for idx, success in enumerate(result.payload):
10584 logging.warn("pause-sync of instance %s for disk %d failed",
10587 feedback_fn("* running the instance OS create scripts...")
10588 # FIXME: pass debug option from opcode to backend
10590 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10591 self.op.debug_level)
10593 feedback_fn("* resuming disk sync")
10594 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10597 for idx, success in enumerate(result.payload):
10599 logging.warn("resume-sync of instance %s for disk %d failed",
10602 os_add_result.Raise("Could not add os for instance %s"
10603 " on node %s" % (instance, pnode_name))
10606 if self.op.mode == constants.INSTANCE_IMPORT:
10607 feedback_fn("* running the instance OS import scripts...")
10611 for idx, image in enumerate(self.src_images):
10615 # FIXME: pass debug option from opcode to backend
10616 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10617 constants.IEIO_FILE, (image, ),
10618 constants.IEIO_SCRIPT,
10619 (iobj.disks[idx], idx),
10621 transfers.append(dt)
10624 masterd.instance.TransferInstanceData(self, feedback_fn,
10625 self.op.src_node, pnode_name,
10626 self.pnode.secondary_ip,
10628 if not compat.all(import_result):
10629 self.LogWarning("Some disks for instance %s on node %s were not"
10630 " imported successfully" % (instance, pnode_name))
10632 rename_from = self._old_instance_name
10634 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10635 feedback_fn("* preparing remote import...")
10636 # The source cluster will stop the instance before attempting to make
10637 # a connection. In some cases stopping an instance can take a long
10638 # time, hence the shutdown timeout is added to the connection
10640 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10641 self.op.source_shutdown_timeout)
10642 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10644 assert iobj.primary_node == self.pnode.name
10646 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10647 self.source_x509_ca,
10648 self._cds, timeouts)
10649 if not compat.all(disk_results):
10650 # TODO: Should the instance still be started, even if some disks
10651 # failed to import (valid for local imports, too)?
10652 self.LogWarning("Some disks for instance %s on node %s were not"
10653 " imported successfully" % (instance, pnode_name))
10655 rename_from = self.source_instance_name
10658 # also checked in the prereq part
10659 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10662 # Run rename script on newly imported instance
10663 assert iobj.name == instance
10664 feedback_fn("Running rename script for %s" % instance)
10665 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10667 self.op.debug_level)
10668 if result.fail_msg:
10669 self.LogWarning("Failed to run rename script for %s on node"
10670 " %s: %s" % (instance, pnode_name, result.fail_msg))
10672 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10675 iobj.admin_state = constants.ADMINST_UP
10676 self.cfg.Update(iobj, feedback_fn)
10677 logging.info("Starting instance %s on node %s", instance, pnode_name)
10678 feedback_fn("* starting instance...")
10679 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10681 result.Raise("Could not start instance")
10683 return list(iobj.all_nodes)
10686 class LUInstanceMultiAlloc(NoHooksLU):
10687 """Allocates multiple instances at the same time.
10692 def CheckArguments(self):
10693 """Check arguments.
10697 for inst in self.op.instances:
10698 if inst.iallocator is not None:
10699 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10700 " instance objects", errors.ECODE_INVAL)
10701 nodes.append(bool(inst.pnode))
10702 if inst.disk_template in constants.DTS_INT_MIRROR:
10703 nodes.append(bool(inst.snode))
10705 has_nodes = compat.any(nodes)
10706 if compat.all(nodes) ^ has_nodes:
10707 raise errors.OpPrereqError("There are instance objects providing"
10708 " pnode/snode while others do not",
10709 errors.ECODE_INVAL)
10711 if self.op.iallocator is None:
10712 default_iallocator = self.cfg.GetDefaultIAllocator()
10713 if default_iallocator and has_nodes:
10714 self.op.iallocator = default_iallocator
10716 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10717 " given and no cluster-wide default"
10718 " iallocator found; please specify either"
10719 " an iallocator or nodes on the instances"
10720 " or set a cluster-wide default iallocator",
10721 errors.ECODE_INVAL)
10723 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10725 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10726 utils.CommaJoin(dups), errors.ECODE_INVAL)
10728 def ExpandNames(self):
10729 """Calculate the locks.
10732 self.share_locks = _ShareAll()
10733 self.needed_locks = {}
10735 if self.op.iallocator:
10736 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10737 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10740 for inst in self.op.instances:
10741 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10742 nodeslist.append(inst.pnode)
10743 if inst.snode is not None:
10744 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10745 nodeslist.append(inst.snode)
10747 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10748 # Lock resources of instance's primary and secondary nodes (copy to
10749 # prevent accidential modification)
10750 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10752 def CheckPrereq(self):
10753 """Check prerequisite.
10756 cluster = self.cfg.GetClusterInfo()
10757 default_vg = self.cfg.GetVGName()
10758 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10759 _ComputeNics(op, cluster, None,
10760 self.cfg, self.proc),
10761 _ComputeFullBeParams(op, cluster))
10762 for op in self.op.instances]
10763 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10764 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10766 ial.Run(self.op.iallocator)
10768 if not ial.success:
10769 raise errors.OpPrereqError("Can't compute nodes using"
10770 " iallocator '%s': %s" %
10771 (self.op.iallocator, ial.info),
10772 errors.ECODE_NORES)
10774 self.ia_result = ial.result
10776 if self.op.dry_run:
10777 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10778 constants.JOB_IDS_KEY: [],
10781 def _ConstructPartialResult(self):
10782 """Contructs the partial result.
10785 (allocatable, failed) = self.ia_result
10787 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10788 map(compat.fst, allocatable),
10789 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10792 def Exec(self, feedback_fn):
10793 """Executes the opcode.
10796 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10797 (allocatable, failed) = self.ia_result
10800 for (name, nodes) in allocatable:
10801 op = op2inst.pop(name)
10804 (op.pnode, op.snode) = nodes
10806 (op.pnode,) = nodes
10810 missing = set(op2inst.keys()) - set(failed)
10811 assert not missing, \
10812 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10814 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10817 def _CheckRADOSFreeSpace():
10818 """Compute disk size requirements inside the RADOS cluster.
10821 # For the RADOS cluster we assume there is always enough space.
10825 class LUInstanceConsole(NoHooksLU):
10826 """Connect to an instance's console.
10828 This is somewhat special in that it returns the command line that
10829 you need to run on the master node in order to connect to the
10835 def ExpandNames(self):
10836 self.share_locks = _ShareAll()
10837 self._ExpandAndLockInstance()
10839 def CheckPrereq(self):
10840 """Check prerequisites.
10842 This checks that the instance is in the cluster.
10845 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10846 assert self.instance is not None, \
10847 "Cannot retrieve locked instance %s" % self.op.instance_name
10848 _CheckNodeOnline(self, self.instance.primary_node)
10850 def Exec(self, feedback_fn):
10851 """Connect to the console of an instance
10854 instance = self.instance
10855 node = instance.primary_node
10857 node_insts = self.rpc.call_instance_list([node],
10858 [instance.hypervisor])[node]
10859 node_insts.Raise("Can't get node information from %s" % node)
10861 if instance.name not in node_insts.payload:
10862 if instance.admin_state == constants.ADMINST_UP:
10863 state = constants.INSTST_ERRORDOWN
10864 elif instance.admin_state == constants.ADMINST_DOWN:
10865 state = constants.INSTST_ADMINDOWN
10867 state = constants.INSTST_ADMINOFFLINE
10868 raise errors.OpExecError("Instance %s is not running (state %s)" %
10869 (instance.name, state))
10871 logging.debug("Connecting to console of %s on %s", instance.name, node)
10873 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10876 def _GetInstanceConsole(cluster, instance):
10877 """Returns console information for an instance.
10879 @type cluster: L{objects.Cluster}
10880 @type instance: L{objects.Instance}
10884 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10885 # beparams and hvparams are passed separately, to avoid editing the
10886 # instance and then saving the defaults in the instance itself.
10887 hvparams = cluster.FillHV(instance)
10888 beparams = cluster.FillBE(instance)
10889 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10891 assert console.instance == instance.name
10892 assert console.Validate()
10894 return console.ToDict()
10897 class LUInstanceReplaceDisks(LogicalUnit):
10898 """Replace the disks of an instance.
10901 HPATH = "mirrors-replace"
10902 HTYPE = constants.HTYPE_INSTANCE
10905 def CheckArguments(self):
10906 """Check arguments.
10909 remote_node = self.op.remote_node
10910 ialloc = self.op.iallocator
10911 if self.op.mode == constants.REPLACE_DISK_CHG:
10912 if remote_node is None and ialloc is None:
10913 raise errors.OpPrereqError("When changing the secondary either an"
10914 " iallocator script must be used or the"
10915 " new node given", errors.ECODE_INVAL)
10917 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10919 elif remote_node is not None or ialloc is not None:
10920 # Not replacing the secondary
10921 raise errors.OpPrereqError("The iallocator and new node options can"
10922 " only be used when changing the"
10923 " secondary node", errors.ECODE_INVAL)
10925 def ExpandNames(self):
10926 self._ExpandAndLockInstance()
10928 assert locking.LEVEL_NODE not in self.needed_locks
10929 assert locking.LEVEL_NODE_RES not in self.needed_locks
10930 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10932 assert self.op.iallocator is None or self.op.remote_node is None, \
10933 "Conflicting options"
10935 if self.op.remote_node is not None:
10936 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10938 # Warning: do not remove the locking of the new secondary here
10939 # unless DRBD8.AddChildren is changed to work in parallel;
10940 # currently it doesn't since parallel invocations of
10941 # FindUnusedMinor will conflict
10942 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10943 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10945 self.needed_locks[locking.LEVEL_NODE] = []
10946 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10948 if self.op.iallocator is not None:
10949 # iallocator will select a new node in the same group
10950 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10952 self.needed_locks[locking.LEVEL_NODE_RES] = []
10954 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10955 self.op.iallocator, self.op.remote_node,
10956 self.op.disks, self.op.early_release,
10957 self.op.ignore_ipolicy)
10959 self.tasklets = [self.replacer]
10961 def DeclareLocks(self, level):
10962 if level == locking.LEVEL_NODEGROUP:
10963 assert self.op.remote_node is None
10964 assert self.op.iallocator is not None
10965 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10967 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10968 # Lock all groups used by instance optimistically; this requires going
10969 # via the node before it's locked, requiring verification later on
10970 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10971 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10973 elif level == locking.LEVEL_NODE:
10974 if self.op.iallocator is not None:
10975 assert self.op.remote_node is None
10976 assert not self.needed_locks[locking.LEVEL_NODE]
10978 # Lock member nodes of all locked groups
10979 self.needed_locks[locking.LEVEL_NODE] = \
10981 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10982 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10984 self._LockInstancesNodes()
10985 elif level == locking.LEVEL_NODE_RES:
10987 self.needed_locks[locking.LEVEL_NODE_RES] = \
10988 self.needed_locks[locking.LEVEL_NODE]
10990 def BuildHooksEnv(self):
10991 """Build hooks env.
10993 This runs on the master, the primary and all the secondaries.
10996 instance = self.replacer.instance
10998 "MODE": self.op.mode,
10999 "NEW_SECONDARY": self.op.remote_node,
11000 "OLD_SECONDARY": instance.secondary_nodes[0],
11002 env.update(_BuildInstanceHookEnvByObject(self, instance))
11005 def BuildHooksNodes(self):
11006 """Build hooks nodes.
11009 instance = self.replacer.instance
11011 self.cfg.GetMasterNode(),
11012 instance.primary_node,
11014 if self.op.remote_node is not None:
11015 nl.append(self.op.remote_node)
11018 def CheckPrereq(self):
11019 """Check prerequisites.
11022 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11023 self.op.iallocator is None)
11025 # Verify if node group locks are still correct
11026 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11028 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11030 return LogicalUnit.CheckPrereq(self)
11033 class TLReplaceDisks(Tasklet):
11034 """Replaces disks for an instance.
11036 Note: Locking is not within the scope of this class.
11039 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11040 disks, early_release, ignore_ipolicy):
11041 """Initializes this class.
11044 Tasklet.__init__(self, lu)
11047 self.instance_name = instance_name
11049 self.iallocator_name = iallocator_name
11050 self.remote_node = remote_node
11052 self.early_release = early_release
11053 self.ignore_ipolicy = ignore_ipolicy
11056 self.instance = None
11057 self.new_node = None
11058 self.target_node = None
11059 self.other_node = None
11060 self.remote_node_info = None
11061 self.node_secondary_ip = None
11064 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11065 """Compute a new secondary node using an IAllocator.
11068 req = iallocator.IAReqRelocate(name=instance_name,
11069 relocate_from=list(relocate_from))
11070 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11072 ial.Run(iallocator_name)
11074 if not ial.success:
11075 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11076 " %s" % (iallocator_name, ial.info),
11077 errors.ECODE_NORES)
11079 remote_node_name = ial.result[0]
11081 lu.LogInfo("Selected new secondary for instance '%s': %s",
11082 instance_name, remote_node_name)
11084 return remote_node_name
11086 def _FindFaultyDisks(self, node_name):
11087 """Wrapper for L{_FindFaultyInstanceDisks}.
11090 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11093 def _CheckDisksActivated(self, instance):
11094 """Checks if the instance disks are activated.
11096 @param instance: The instance to check disks
11097 @return: True if they are activated, False otherwise
11100 nodes = instance.all_nodes
11102 for idx, dev in enumerate(instance.disks):
11104 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11105 self.cfg.SetDiskID(dev, node)
11107 result = _BlockdevFind(self, node, dev, instance)
11111 elif result.fail_msg or not result.payload:
11116 def CheckPrereq(self):
11117 """Check prerequisites.
11119 This checks that the instance is in the cluster.
11122 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11123 assert instance is not None, \
11124 "Cannot retrieve locked instance %s" % self.instance_name
11126 if instance.disk_template != constants.DT_DRBD8:
11127 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11128 " instances", errors.ECODE_INVAL)
11130 if len(instance.secondary_nodes) != 1:
11131 raise errors.OpPrereqError("The instance has a strange layout,"
11132 " expected one secondary but found %d" %
11133 len(instance.secondary_nodes),
11134 errors.ECODE_FAULT)
11136 instance = self.instance
11137 secondary_node = instance.secondary_nodes[0]
11139 if self.iallocator_name is None:
11140 remote_node = self.remote_node
11142 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11143 instance.name, instance.secondary_nodes)
11145 if remote_node is None:
11146 self.remote_node_info = None
11148 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11149 "Remote node '%s' is not locked" % remote_node
11151 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11152 assert self.remote_node_info is not None, \
11153 "Cannot retrieve locked node %s" % remote_node
11155 if remote_node == self.instance.primary_node:
11156 raise errors.OpPrereqError("The specified node is the primary node of"
11157 " the instance", errors.ECODE_INVAL)
11159 if remote_node == secondary_node:
11160 raise errors.OpPrereqError("The specified node is already the"
11161 " secondary node of the instance",
11162 errors.ECODE_INVAL)
11164 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11165 constants.REPLACE_DISK_CHG):
11166 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11167 errors.ECODE_INVAL)
11169 if self.mode == constants.REPLACE_DISK_AUTO:
11170 if not self._CheckDisksActivated(instance):
11171 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11172 " first" % self.instance_name,
11173 errors.ECODE_STATE)
11174 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11175 faulty_secondary = self._FindFaultyDisks(secondary_node)
11177 if faulty_primary and faulty_secondary:
11178 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11179 " one node and can not be repaired"
11180 " automatically" % self.instance_name,
11181 errors.ECODE_STATE)
11184 self.disks = faulty_primary
11185 self.target_node = instance.primary_node
11186 self.other_node = secondary_node
11187 check_nodes = [self.target_node, self.other_node]
11188 elif faulty_secondary:
11189 self.disks = faulty_secondary
11190 self.target_node = secondary_node
11191 self.other_node = instance.primary_node
11192 check_nodes = [self.target_node, self.other_node]
11198 # Non-automatic modes
11199 if self.mode == constants.REPLACE_DISK_PRI:
11200 self.target_node = instance.primary_node
11201 self.other_node = secondary_node
11202 check_nodes = [self.target_node, self.other_node]
11204 elif self.mode == constants.REPLACE_DISK_SEC:
11205 self.target_node = secondary_node
11206 self.other_node = instance.primary_node
11207 check_nodes = [self.target_node, self.other_node]
11209 elif self.mode == constants.REPLACE_DISK_CHG:
11210 self.new_node = remote_node
11211 self.other_node = instance.primary_node
11212 self.target_node = secondary_node
11213 check_nodes = [self.new_node, self.other_node]
11215 _CheckNodeNotDrained(self.lu, remote_node)
11216 _CheckNodeVmCapable(self.lu, remote_node)
11218 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11219 assert old_node_info is not None
11220 if old_node_info.offline and not self.early_release:
11221 # doesn't make sense to delay the release
11222 self.early_release = True
11223 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11224 " early-release mode", secondary_node)
11227 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11230 # If not specified all disks should be replaced
11232 self.disks = range(len(self.instance.disks))
11234 # TODO: This is ugly, but right now we can't distinguish between internal
11235 # submitted opcode and external one. We should fix that.
11236 if self.remote_node_info:
11237 # We change the node, lets verify it still meets instance policy
11238 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11239 cluster = self.cfg.GetClusterInfo()
11240 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11242 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11243 ignore=self.ignore_ipolicy)
11245 for node in check_nodes:
11246 _CheckNodeOnline(self.lu, node)
11248 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11251 if node_name is not None)
11253 # Release unneeded node and node resource locks
11254 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11255 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11257 # Release any owned node group
11258 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11259 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11261 # Check whether disks are valid
11262 for disk_idx in self.disks:
11263 instance.FindDisk(disk_idx)
11265 # Get secondary node IP addresses
11266 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11267 in self.cfg.GetMultiNodeInfo(touched_nodes))
11269 def Exec(self, feedback_fn):
11270 """Execute disk replacement.
11272 This dispatches the disk replacement to the appropriate handler.
11276 # Verify owned locks before starting operation
11277 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11278 assert set(owned_nodes) == set(self.node_secondary_ip), \
11279 ("Incorrect node locks, owning %s, expected %s" %
11280 (owned_nodes, self.node_secondary_ip.keys()))
11281 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11282 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11284 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11285 assert list(owned_instances) == [self.instance_name], \
11286 "Instance '%s' not locked" % self.instance_name
11288 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11289 "Should not own any node group lock at this point"
11292 feedback_fn("No disks need replacement for instance '%s'" %
11293 self.instance.name)
11296 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11297 (utils.CommaJoin(self.disks), self.instance.name))
11298 feedback_fn("Current primary node: %s", self.instance.primary_node)
11299 feedback_fn("Current seconary node: %s",
11300 utils.CommaJoin(self.instance.secondary_nodes))
11302 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11304 # Activate the instance disks if we're replacing them on a down instance
11306 _StartInstanceDisks(self.lu, self.instance, True)
11309 # Should we replace the secondary node?
11310 if self.new_node is not None:
11311 fn = self._ExecDrbd8Secondary
11313 fn = self._ExecDrbd8DiskOnly
11315 result = fn(feedback_fn)
11317 # Deactivate the instance disks if we're replacing them on a
11320 _SafeShutdownInstanceDisks(self.lu, self.instance)
11322 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11325 # Verify owned locks
11326 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11327 nodes = frozenset(self.node_secondary_ip)
11328 assert ((self.early_release and not owned_nodes) or
11329 (not self.early_release and not (set(owned_nodes) - nodes))), \
11330 ("Not owning the correct locks, early_release=%s, owned=%r,"
11331 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11335 def _CheckVolumeGroup(self, nodes):
11336 self.lu.LogInfo("Checking volume groups")
11338 vgname = self.cfg.GetVGName()
11340 # Make sure volume group exists on all involved nodes
11341 results = self.rpc.call_vg_list(nodes)
11343 raise errors.OpExecError("Can't list volume groups on the nodes")
11346 res = results[node]
11347 res.Raise("Error checking node %s" % node)
11348 if vgname not in res.payload:
11349 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11352 def _CheckDisksExistence(self, nodes):
11353 # Check disk existence
11354 for idx, dev in enumerate(self.instance.disks):
11355 if idx not in self.disks:
11359 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11360 self.cfg.SetDiskID(dev, node)
11362 result = _BlockdevFind(self, node, dev, self.instance)
11364 msg = result.fail_msg
11365 if msg or not result.payload:
11367 msg = "disk not found"
11368 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11371 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11372 for idx, dev in enumerate(self.instance.disks):
11373 if idx not in self.disks:
11376 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11379 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11380 on_primary, ldisk=ldisk):
11381 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11382 " replace disks for instance %s" %
11383 (node_name, self.instance.name))
11385 def _CreateNewStorage(self, node_name):
11386 """Create new storage on the primary or secondary node.
11388 This is only used for same-node replaces, not for changing the
11389 secondary node, hence we don't want to modify the existing disk.
11394 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11395 for idx, dev in enumerate(disks):
11396 if idx not in self.disks:
11399 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11401 self.cfg.SetDiskID(dev, node_name)
11403 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11404 names = _GenerateUniqueNames(self.lu, lv_names)
11406 (data_disk, meta_disk) = dev.children
11407 vg_data = data_disk.logical_id[0]
11408 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11409 logical_id=(vg_data, names[0]),
11410 params=data_disk.params)
11411 vg_meta = meta_disk.logical_id[0]
11412 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11413 size=constants.DRBD_META_SIZE,
11414 logical_id=(vg_meta, names[1]),
11415 params=meta_disk.params)
11417 new_lvs = [lv_data, lv_meta]
11418 old_lvs = [child.Copy() for child in dev.children]
11419 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11421 # we pass force_create=True to force the LVM creation
11422 for new_lv in new_lvs:
11423 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11424 _GetInstanceInfoText(self.instance), False)
11428 def _CheckDevices(self, node_name, iv_names):
11429 for name, (dev, _, _) in iv_names.iteritems():
11430 self.cfg.SetDiskID(dev, node_name)
11432 result = _BlockdevFind(self, node_name, dev, self.instance)
11434 msg = result.fail_msg
11435 if msg or not result.payload:
11437 msg = "disk not found"
11438 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11441 if result.payload.is_degraded:
11442 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11444 def _RemoveOldStorage(self, node_name, iv_names):
11445 for name, (_, old_lvs, _) in iv_names.iteritems():
11446 self.lu.LogInfo("Remove logical volumes for %s" % name)
11449 self.cfg.SetDiskID(lv, node_name)
11451 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11453 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11454 hint="remove unused LVs manually")
11456 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11457 """Replace a disk on the primary or secondary for DRBD 8.
11459 The algorithm for replace is quite complicated:
11461 1. for each disk to be replaced:
11463 1. create new LVs on the target node with unique names
11464 1. detach old LVs from the drbd device
11465 1. rename old LVs to name_replaced.<time_t>
11466 1. rename new LVs to old LVs
11467 1. attach the new LVs (with the old names now) to the drbd device
11469 1. wait for sync across all devices
11471 1. for each modified disk:
11473 1. remove old LVs (which have the name name_replaces.<time_t>)
11475 Failures are not very well handled.
11480 # Step: check device activation
11481 self.lu.LogStep(1, steps_total, "Check device existence")
11482 self._CheckDisksExistence([self.other_node, self.target_node])
11483 self._CheckVolumeGroup([self.target_node, self.other_node])
11485 # Step: check other node consistency
11486 self.lu.LogStep(2, steps_total, "Check peer consistency")
11487 self._CheckDisksConsistency(self.other_node,
11488 self.other_node == self.instance.primary_node,
11491 # Step: create new storage
11492 self.lu.LogStep(3, steps_total, "Allocate new storage")
11493 iv_names = self._CreateNewStorage(self.target_node)
11495 # Step: for each lv, detach+rename*2+attach
11496 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11497 for dev, old_lvs, new_lvs in iv_names.itervalues():
11498 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11500 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11502 result.Raise("Can't detach drbd from local storage on node"
11503 " %s for device %s" % (self.target_node, dev.iv_name))
11505 #cfg.Update(instance)
11507 # ok, we created the new LVs, so now we know we have the needed
11508 # storage; as such, we proceed on the target node to rename
11509 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11510 # using the assumption that logical_id == physical_id (which in
11511 # turn is the unique_id on that node)
11513 # FIXME(iustin): use a better name for the replaced LVs
11514 temp_suffix = int(time.time())
11515 ren_fn = lambda d, suff: (d.physical_id[0],
11516 d.physical_id[1] + "_replaced-%s" % suff)
11518 # Build the rename list based on what LVs exist on the node
11519 rename_old_to_new = []
11520 for to_ren in old_lvs:
11521 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11522 if not result.fail_msg and result.payload:
11524 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11526 self.lu.LogInfo("Renaming the old LVs on the target node")
11527 result = self.rpc.call_blockdev_rename(self.target_node,
11529 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11531 # Now we rename the new LVs to the old LVs
11532 self.lu.LogInfo("Renaming the new LVs on the target node")
11533 rename_new_to_old = [(new, old.physical_id)
11534 for old, new in zip(old_lvs, new_lvs)]
11535 result = self.rpc.call_blockdev_rename(self.target_node,
11537 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11539 # Intermediate steps of in memory modifications
11540 for old, new in zip(old_lvs, new_lvs):
11541 new.logical_id = old.logical_id
11542 self.cfg.SetDiskID(new, self.target_node)
11544 # We need to modify old_lvs so that removal later removes the
11545 # right LVs, not the newly added ones; note that old_lvs is a
11547 for disk in old_lvs:
11548 disk.logical_id = ren_fn(disk, temp_suffix)
11549 self.cfg.SetDiskID(disk, self.target_node)
11551 # Now that the new lvs have the old name, we can add them to the device
11552 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11553 result = self.rpc.call_blockdev_addchildren(self.target_node,
11554 (dev, self.instance), new_lvs)
11555 msg = result.fail_msg
11557 for new_lv in new_lvs:
11558 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11561 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11562 hint=("cleanup manually the unused logical"
11564 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11566 cstep = itertools.count(5)
11568 if self.early_release:
11569 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11570 self._RemoveOldStorage(self.target_node, iv_names)
11571 # TODO: Check if releasing locks early still makes sense
11572 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11574 # Release all resource locks except those used by the instance
11575 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11576 keep=self.node_secondary_ip.keys())
11578 # Release all node locks while waiting for sync
11579 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11581 # TODO: Can the instance lock be downgraded here? Take the optional disk
11582 # shutdown in the caller into consideration.
11585 # This can fail as the old devices are degraded and _WaitForSync
11586 # does a combined result over all disks, so we don't check its return value
11587 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11588 _WaitForSync(self.lu, self.instance)
11590 # Check all devices manually
11591 self._CheckDevices(self.instance.primary_node, iv_names)
11593 # Step: remove old storage
11594 if not self.early_release:
11595 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11596 self._RemoveOldStorage(self.target_node, iv_names)
11598 def _ExecDrbd8Secondary(self, feedback_fn):
11599 """Replace the secondary node for DRBD 8.
11601 The algorithm for replace is quite complicated:
11602 - for all disks of the instance:
11603 - create new LVs on the new node with same names
11604 - shutdown the drbd device on the old secondary
11605 - disconnect the drbd network on the primary
11606 - create the drbd device on the new secondary
11607 - network attach the drbd on the primary, using an artifice:
11608 the drbd code for Attach() will connect to the network if it
11609 finds a device which is connected to the good local disks but
11610 not network enabled
11611 - wait for sync across all devices
11612 - remove all disks from the old secondary
11614 Failures are not very well handled.
11619 pnode = self.instance.primary_node
11621 # Step: check device activation
11622 self.lu.LogStep(1, steps_total, "Check device existence")
11623 self._CheckDisksExistence([self.instance.primary_node])
11624 self._CheckVolumeGroup([self.instance.primary_node])
11626 # Step: check other node consistency
11627 self.lu.LogStep(2, steps_total, "Check peer consistency")
11628 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11630 # Step: create new storage
11631 self.lu.LogStep(3, steps_total, "Allocate new storage")
11632 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11633 for idx, dev in enumerate(disks):
11634 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11635 (self.new_node, idx))
11636 # we pass force_create=True to force LVM creation
11637 for new_lv in dev.children:
11638 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11639 True, _GetInstanceInfoText(self.instance), False)
11641 # Step 4: dbrd minors and drbd setups changes
11642 # after this, we must manually remove the drbd minors on both the
11643 # error and the success paths
11644 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11645 minors = self.cfg.AllocateDRBDMinor([self.new_node
11646 for dev in self.instance.disks],
11647 self.instance.name)
11648 logging.debug("Allocated minors %r", minors)
11651 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11652 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11653 (self.new_node, idx))
11654 # create new devices on new_node; note that we create two IDs:
11655 # one without port, so the drbd will be activated without
11656 # networking information on the new node at this stage, and one
11657 # with network, for the latter activation in step 4
11658 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11659 if self.instance.primary_node == o_node1:
11662 assert self.instance.primary_node == o_node2, "Three-node instance?"
11665 new_alone_id = (self.instance.primary_node, self.new_node, None,
11666 p_minor, new_minor, o_secret)
11667 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11668 p_minor, new_minor, o_secret)
11670 iv_names[idx] = (dev, dev.children, new_net_id)
11671 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11673 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11674 logical_id=new_alone_id,
11675 children=dev.children,
11678 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11681 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11683 _GetInstanceInfoText(self.instance), False)
11684 except errors.GenericError:
11685 self.cfg.ReleaseDRBDMinors(self.instance.name)
11688 # We have new devices, shutdown the drbd on the old secondary
11689 for idx, dev in enumerate(self.instance.disks):
11690 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11691 self.cfg.SetDiskID(dev, self.target_node)
11692 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11693 (dev, self.instance)).fail_msg
11695 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11696 "node: %s" % (idx, msg),
11697 hint=("Please cleanup this device manually as"
11698 " soon as possible"))
11700 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11701 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11702 self.instance.disks)[pnode]
11704 msg = result.fail_msg
11706 # detaches didn't succeed (unlikely)
11707 self.cfg.ReleaseDRBDMinors(self.instance.name)
11708 raise errors.OpExecError("Can't detach the disks from the network on"
11709 " old node: %s" % (msg,))
11711 # if we managed to detach at least one, we update all the disks of
11712 # the instance to point to the new secondary
11713 self.lu.LogInfo("Updating instance configuration")
11714 for dev, _, new_logical_id in iv_names.itervalues():
11715 dev.logical_id = new_logical_id
11716 self.cfg.SetDiskID(dev, self.instance.primary_node)
11718 self.cfg.Update(self.instance, feedback_fn)
11720 # Release all node locks (the configuration has been updated)
11721 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11723 # and now perform the drbd attach
11724 self.lu.LogInfo("Attaching primary drbds to new secondary"
11725 " (standalone => connected)")
11726 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11728 self.node_secondary_ip,
11729 (self.instance.disks, self.instance),
11730 self.instance.name,
11732 for to_node, to_result in result.items():
11733 msg = to_result.fail_msg
11735 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11737 hint=("please do a gnt-instance info to see the"
11738 " status of disks"))
11740 cstep = itertools.count(5)
11742 if self.early_release:
11743 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11744 self._RemoveOldStorage(self.target_node, iv_names)
11745 # TODO: Check if releasing locks early still makes sense
11746 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11748 # Release all resource locks except those used by the instance
11749 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11750 keep=self.node_secondary_ip.keys())
11752 # TODO: Can the instance lock be downgraded here? Take the optional disk
11753 # shutdown in the caller into consideration.
11756 # This can fail as the old devices are degraded and _WaitForSync
11757 # does a combined result over all disks, so we don't check its return value
11758 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11759 _WaitForSync(self.lu, self.instance)
11761 # Check all devices manually
11762 self._CheckDevices(self.instance.primary_node, iv_names)
11764 # Step: remove old storage
11765 if not self.early_release:
11766 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11767 self._RemoveOldStorage(self.target_node, iv_names)
11770 class LURepairNodeStorage(NoHooksLU):
11771 """Repairs the volume group on a node.
11776 def CheckArguments(self):
11777 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11779 storage_type = self.op.storage_type
11781 if (constants.SO_FIX_CONSISTENCY not in
11782 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11783 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11784 " repaired" % storage_type,
11785 errors.ECODE_INVAL)
11787 def ExpandNames(self):
11788 self.needed_locks = {
11789 locking.LEVEL_NODE: [self.op.node_name],
11792 def _CheckFaultyDisks(self, instance, node_name):
11793 """Ensure faulty disks abort the opcode or at least warn."""
11795 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11797 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11798 " node '%s'" % (instance.name, node_name),
11799 errors.ECODE_STATE)
11800 except errors.OpPrereqError, err:
11801 if self.op.ignore_consistency:
11802 self.proc.LogWarning(str(err.args[0]))
11806 def CheckPrereq(self):
11807 """Check prerequisites.
11810 # Check whether any instance on this node has faulty disks
11811 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11812 if inst.admin_state != constants.ADMINST_UP:
11814 check_nodes = set(inst.all_nodes)
11815 check_nodes.discard(self.op.node_name)
11816 for inst_node_name in check_nodes:
11817 self._CheckFaultyDisks(inst, inst_node_name)
11819 def Exec(self, feedback_fn):
11820 feedback_fn("Repairing storage unit '%s' on %s ..." %
11821 (self.op.name, self.op.node_name))
11823 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11824 result = self.rpc.call_storage_execute(self.op.node_name,
11825 self.op.storage_type, st_args,
11827 constants.SO_FIX_CONSISTENCY)
11828 result.Raise("Failed to repair storage unit '%s' on %s" %
11829 (self.op.name, self.op.node_name))
11832 class LUNodeEvacuate(NoHooksLU):
11833 """Evacuates instances off a list of nodes.
11838 _MODE2IALLOCATOR = {
11839 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11840 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11841 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11843 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11844 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11845 constants.IALLOCATOR_NEVAC_MODES)
11847 def CheckArguments(self):
11848 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11850 def ExpandNames(self):
11851 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11853 if self.op.remote_node is not None:
11854 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11855 assert self.op.remote_node
11857 if self.op.remote_node == self.op.node_name:
11858 raise errors.OpPrereqError("Can not use evacuated node as a new"
11859 " secondary node", errors.ECODE_INVAL)
11861 if self.op.mode != constants.NODE_EVAC_SEC:
11862 raise errors.OpPrereqError("Without the use of an iallocator only"
11863 " secondary instances can be evacuated",
11864 errors.ECODE_INVAL)
11867 self.share_locks = _ShareAll()
11868 self.needed_locks = {
11869 locking.LEVEL_INSTANCE: [],
11870 locking.LEVEL_NODEGROUP: [],
11871 locking.LEVEL_NODE: [],
11874 # Determine nodes (via group) optimistically, needs verification once locks
11875 # have been acquired
11876 self.lock_nodes = self._DetermineNodes()
11878 def _DetermineNodes(self):
11879 """Gets the list of nodes to operate on.
11882 if self.op.remote_node is None:
11883 # Iallocator will choose any node(s) in the same group
11884 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11886 group_nodes = frozenset([self.op.remote_node])
11888 # Determine nodes to be locked
11889 return set([self.op.node_name]) | group_nodes
11891 def _DetermineInstances(self):
11892 """Builds list of instances to operate on.
11895 assert self.op.mode in constants.NODE_EVAC_MODES
11897 if self.op.mode == constants.NODE_EVAC_PRI:
11898 # Primary instances only
11899 inst_fn = _GetNodePrimaryInstances
11900 assert self.op.remote_node is None, \
11901 "Evacuating primary instances requires iallocator"
11902 elif self.op.mode == constants.NODE_EVAC_SEC:
11903 # Secondary instances only
11904 inst_fn = _GetNodeSecondaryInstances
11907 assert self.op.mode == constants.NODE_EVAC_ALL
11908 inst_fn = _GetNodeInstances
11909 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11911 raise errors.OpPrereqError("Due to an issue with the iallocator"
11912 " interface it is not possible to evacuate"
11913 " all instances at once; specify explicitly"
11914 " whether to evacuate primary or secondary"
11916 errors.ECODE_INVAL)
11918 return inst_fn(self.cfg, self.op.node_name)
11920 def DeclareLocks(self, level):
11921 if level == locking.LEVEL_INSTANCE:
11922 # Lock instances optimistically, needs verification once node and group
11923 # locks have been acquired
11924 self.needed_locks[locking.LEVEL_INSTANCE] = \
11925 set(i.name for i in self._DetermineInstances())
11927 elif level == locking.LEVEL_NODEGROUP:
11928 # Lock node groups for all potential target nodes optimistically, needs
11929 # verification once nodes have been acquired
11930 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11931 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11933 elif level == locking.LEVEL_NODE:
11934 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11936 def CheckPrereq(self):
11938 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11939 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11940 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11942 need_nodes = self._DetermineNodes()
11944 if not owned_nodes.issuperset(need_nodes):
11945 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11946 " locks were acquired, current nodes are"
11947 " are '%s', used to be '%s'; retry the"
11949 (self.op.node_name,
11950 utils.CommaJoin(need_nodes),
11951 utils.CommaJoin(owned_nodes)),
11952 errors.ECODE_STATE)
11954 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11955 if owned_groups != wanted_groups:
11956 raise errors.OpExecError("Node groups changed since locks were acquired,"
11957 " current groups are '%s', used to be '%s';"
11958 " retry the operation" %
11959 (utils.CommaJoin(wanted_groups),
11960 utils.CommaJoin(owned_groups)))
11962 # Determine affected instances
11963 self.instances = self._DetermineInstances()
11964 self.instance_names = [i.name for i in self.instances]
11966 if set(self.instance_names) != owned_instances:
11967 raise errors.OpExecError("Instances on node '%s' changed since locks"
11968 " were acquired, current instances are '%s',"
11969 " used to be '%s'; retry the operation" %
11970 (self.op.node_name,
11971 utils.CommaJoin(self.instance_names),
11972 utils.CommaJoin(owned_instances)))
11974 if self.instance_names:
11975 self.LogInfo("Evacuating instances from node '%s': %s",
11977 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11979 self.LogInfo("No instances to evacuate from node '%s'",
11982 if self.op.remote_node is not None:
11983 for i in self.instances:
11984 if i.primary_node == self.op.remote_node:
11985 raise errors.OpPrereqError("Node %s is the primary node of"
11986 " instance %s, cannot use it as"
11988 (self.op.remote_node, i.name),
11989 errors.ECODE_INVAL)
11991 def Exec(self, feedback_fn):
11992 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11994 if not self.instance_names:
11995 # No instances to evacuate
11998 elif self.op.iallocator is not None:
11999 # TODO: Implement relocation to other group
12000 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12001 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12002 instances=list(self.instance_names))
12003 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12005 ial.Run(self.op.iallocator)
12007 if not ial.success:
12008 raise errors.OpPrereqError("Can't compute node evacuation using"
12009 " iallocator '%s': %s" %
12010 (self.op.iallocator, ial.info),
12011 errors.ECODE_NORES)
12013 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12015 elif self.op.remote_node is not None:
12016 assert self.op.mode == constants.NODE_EVAC_SEC
12018 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12019 remote_node=self.op.remote_node,
12021 mode=constants.REPLACE_DISK_CHG,
12022 early_release=self.op.early_release)]
12023 for instance_name in self.instance_names]
12026 raise errors.ProgrammerError("No iallocator or remote node")
12028 return ResultWithJobs(jobs)
12031 def _SetOpEarlyRelease(early_release, op):
12032 """Sets C{early_release} flag on opcodes if available.
12036 op.early_release = early_release
12037 except AttributeError:
12038 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12043 def _NodeEvacDest(use_nodes, group, nodes):
12044 """Returns group or nodes depending on caller's choice.
12048 return utils.CommaJoin(nodes)
12053 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12054 """Unpacks the result of change-group and node-evacuate iallocator requests.
12056 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12057 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12059 @type lu: L{LogicalUnit}
12060 @param lu: Logical unit instance
12061 @type alloc_result: tuple/list
12062 @param alloc_result: Result from iallocator
12063 @type early_release: bool
12064 @param early_release: Whether to release locks early if possible
12065 @type use_nodes: bool
12066 @param use_nodes: Whether to display node names instead of groups
12069 (moved, failed, jobs) = alloc_result
12072 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12073 for (name, reason) in failed)
12074 lu.LogWarning("Unable to evacuate instances %s", failreason)
12075 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12078 lu.LogInfo("Instances to be moved: %s",
12079 utils.CommaJoin("%s (to %s)" %
12080 (name, _NodeEvacDest(use_nodes, group, nodes))
12081 for (name, group, nodes) in moved))
12083 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12084 map(opcodes.OpCode.LoadOpCode, ops))
12088 def _DiskSizeInBytesToMebibytes(lu, size):
12089 """Converts a disk size in bytes to mebibytes.
12091 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12094 (mib, remainder) = divmod(size, 1024 * 1024)
12097 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12098 " to not overwrite existing data (%s bytes will not be"
12099 " wiped)", (1024 * 1024) - remainder)
12105 class LUInstanceGrowDisk(LogicalUnit):
12106 """Grow a disk of an instance.
12109 HPATH = "disk-grow"
12110 HTYPE = constants.HTYPE_INSTANCE
12113 def ExpandNames(self):
12114 self._ExpandAndLockInstance()
12115 self.needed_locks[locking.LEVEL_NODE] = []
12116 self.needed_locks[locking.LEVEL_NODE_RES] = []
12117 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12118 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12120 def DeclareLocks(self, level):
12121 if level == locking.LEVEL_NODE:
12122 self._LockInstancesNodes()
12123 elif level == locking.LEVEL_NODE_RES:
12125 self.needed_locks[locking.LEVEL_NODE_RES] = \
12126 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12128 def BuildHooksEnv(self):
12129 """Build hooks env.
12131 This runs on the master, the primary and all the secondaries.
12135 "DISK": self.op.disk,
12136 "AMOUNT": self.op.amount,
12137 "ABSOLUTE": self.op.absolute,
12139 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12142 def BuildHooksNodes(self):
12143 """Build hooks nodes.
12146 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12149 def CheckPrereq(self):
12150 """Check prerequisites.
12152 This checks that the instance is in the cluster.
12155 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12156 assert instance is not None, \
12157 "Cannot retrieve locked instance %s" % self.op.instance_name
12158 nodenames = list(instance.all_nodes)
12159 for node in nodenames:
12160 _CheckNodeOnline(self, node)
12162 self.instance = instance
12164 if instance.disk_template not in constants.DTS_GROWABLE:
12165 raise errors.OpPrereqError("Instance's disk layout does not support"
12166 " growing", errors.ECODE_INVAL)
12168 self.disk = instance.FindDisk(self.op.disk)
12170 if self.op.absolute:
12171 self.target = self.op.amount
12172 self.delta = self.target - self.disk.size
12174 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12175 "current disk size (%s)" %
12176 (utils.FormatUnit(self.target, "h"),
12177 utils.FormatUnit(self.disk.size, "h")),
12178 errors.ECODE_STATE)
12180 self.delta = self.op.amount
12181 self.target = self.disk.size + self.delta
12183 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12184 utils.FormatUnit(self.delta, "h"),
12185 errors.ECODE_INVAL)
12187 if instance.disk_template not in (constants.DT_FILE,
12188 constants.DT_SHARED_FILE,
12190 # TODO: check the free disk space for file, when that feature will be
12192 _CheckNodesFreeDiskPerVG(self, nodenames,
12193 self.disk.ComputeGrowth(self.delta))
12195 def Exec(self, feedback_fn):
12196 """Execute disk grow.
12199 instance = self.instance
12202 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12203 assert (self.owned_locks(locking.LEVEL_NODE) ==
12204 self.owned_locks(locking.LEVEL_NODE_RES))
12206 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12208 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12210 raise errors.OpExecError("Cannot activate block device to grow")
12212 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12213 (self.op.disk, instance.name,
12214 utils.FormatUnit(self.delta, "h"),
12215 utils.FormatUnit(self.target, "h")))
12217 # First run all grow ops in dry-run mode
12218 for node in instance.all_nodes:
12219 self.cfg.SetDiskID(disk, node)
12220 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12222 result.Raise("Dry-run grow request failed to node %s" % node)
12225 # Get disk size from primary node for wiping
12226 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12227 result.Raise("Failed to retrieve disk size from node '%s'" %
12228 instance.primary_node)
12230 (disk_size_in_bytes, ) = result.payload
12232 if disk_size_in_bytes is None:
12233 raise errors.OpExecError("Failed to retrieve disk size from primary"
12234 " node '%s'" % instance.primary_node)
12236 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12238 assert old_disk_size >= disk.size, \
12239 ("Retrieved disk size too small (got %s, should be at least %s)" %
12240 (old_disk_size, disk.size))
12242 old_disk_size = None
12244 # We know that (as far as we can test) operations across different
12245 # nodes will succeed, time to run it for real on the backing storage
12246 for node in instance.all_nodes:
12247 self.cfg.SetDiskID(disk, node)
12248 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12250 result.Raise("Grow request failed to node %s" % node)
12252 # And now execute it for logical storage, on the primary node
12253 node = instance.primary_node
12254 self.cfg.SetDiskID(disk, node)
12255 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12257 result.Raise("Grow request failed to node %s" % node)
12259 disk.RecordGrow(self.delta)
12260 self.cfg.Update(instance, feedback_fn)
12262 # Changes have been recorded, release node lock
12263 _ReleaseLocks(self, locking.LEVEL_NODE)
12265 # Downgrade lock while waiting for sync
12266 self.glm.downgrade(locking.LEVEL_INSTANCE)
12268 assert wipe_disks ^ (old_disk_size is None)
12271 assert instance.disks[self.op.disk] == disk
12273 # Wipe newly added disk space
12274 _WipeDisks(self, instance,
12275 disks=[(self.op.disk, disk, old_disk_size)])
12277 if self.op.wait_for_sync:
12278 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12280 self.proc.LogWarning("Disk sync-ing has not returned a good"
12281 " status; please check the instance")
12282 if instance.admin_state != constants.ADMINST_UP:
12283 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12284 elif instance.admin_state != constants.ADMINST_UP:
12285 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12286 " not supposed to be running because no wait for"
12287 " sync mode was requested")
12289 assert self.owned_locks(locking.LEVEL_NODE_RES)
12290 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12293 class LUInstanceQueryData(NoHooksLU):
12294 """Query runtime instance data.
12299 def ExpandNames(self):
12300 self.needed_locks = {}
12302 # Use locking if requested or when non-static information is wanted
12303 if not (self.op.static or self.op.use_locking):
12304 self.LogWarning("Non-static data requested, locks need to be acquired")
12305 self.op.use_locking = True
12307 if self.op.instances or not self.op.use_locking:
12308 # Expand instance names right here
12309 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12311 # Will use acquired locks
12312 self.wanted_names = None
12314 if self.op.use_locking:
12315 self.share_locks = _ShareAll()
12317 if self.wanted_names is None:
12318 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12320 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12322 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12323 self.needed_locks[locking.LEVEL_NODE] = []
12324 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12326 def DeclareLocks(self, level):
12327 if self.op.use_locking:
12328 if level == locking.LEVEL_NODEGROUP:
12329 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12331 # Lock all groups used by instances optimistically; this requires going
12332 # via the node before it's locked, requiring verification later on
12333 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12334 frozenset(group_uuid
12335 for instance_name in owned_instances
12337 self.cfg.GetInstanceNodeGroups(instance_name))
12339 elif level == locking.LEVEL_NODE:
12340 self._LockInstancesNodes()
12342 def CheckPrereq(self):
12343 """Check prerequisites.
12345 This only checks the optional instance list against the existing names.
12348 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12349 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12350 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12352 if self.wanted_names is None:
12353 assert self.op.use_locking, "Locking was not used"
12354 self.wanted_names = owned_instances
12356 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12358 if self.op.use_locking:
12359 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12362 assert not (owned_instances or owned_groups or owned_nodes)
12364 self.wanted_instances = instances.values()
12366 def _ComputeBlockdevStatus(self, node, instance, dev):
12367 """Returns the status of a block device
12370 if self.op.static or not node:
12373 self.cfg.SetDiskID(dev, node)
12375 result = self.rpc.call_blockdev_find(node, dev)
12379 result.Raise("Can't compute disk status for %s" % instance.name)
12381 status = result.payload
12385 return (status.dev_path, status.major, status.minor,
12386 status.sync_percent, status.estimated_time,
12387 status.is_degraded, status.ldisk_status)
12389 def _ComputeDiskStatus(self, instance, snode, dev):
12390 """Compute block device status.
12393 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12395 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12397 def _ComputeDiskStatusInner(self, instance, snode, dev):
12398 """Compute block device status.
12400 @attention: The device has to be annotated already.
12403 if dev.dev_type in constants.LDS_DRBD:
12404 # we change the snode then (otherwise we use the one passed in)
12405 if dev.logical_id[0] == instance.primary_node:
12406 snode = dev.logical_id[1]
12408 snode = dev.logical_id[0]
12410 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12412 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12415 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12422 "iv_name": dev.iv_name,
12423 "dev_type": dev.dev_type,
12424 "logical_id": dev.logical_id,
12425 "physical_id": dev.physical_id,
12426 "pstatus": dev_pstatus,
12427 "sstatus": dev_sstatus,
12428 "children": dev_children,
12433 def Exec(self, feedback_fn):
12434 """Gather and return data"""
12437 cluster = self.cfg.GetClusterInfo()
12439 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12440 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12442 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12443 for node in nodes.values()))
12445 group2name_fn = lambda uuid: groups[uuid].name
12447 for instance in self.wanted_instances:
12448 pnode = nodes[instance.primary_node]
12450 if self.op.static or pnode.offline:
12451 remote_state = None
12453 self.LogWarning("Primary node %s is marked offline, returning static"
12454 " information only for instance %s" %
12455 (pnode.name, instance.name))
12457 remote_info = self.rpc.call_instance_info(instance.primary_node,
12459 instance.hypervisor)
12460 remote_info.Raise("Error checking node %s" % instance.primary_node)
12461 remote_info = remote_info.payload
12462 if remote_info and "state" in remote_info:
12463 remote_state = "up"
12465 if instance.admin_state == constants.ADMINST_UP:
12466 remote_state = "down"
12468 remote_state = instance.admin_state
12470 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12473 snodes_group_uuids = [nodes[snode_name].group
12474 for snode_name in instance.secondary_nodes]
12476 result[instance.name] = {
12477 "name": instance.name,
12478 "config_state": instance.admin_state,
12479 "run_state": remote_state,
12480 "pnode": instance.primary_node,
12481 "pnode_group_uuid": pnode.group,
12482 "pnode_group_name": group2name_fn(pnode.group),
12483 "snodes": instance.secondary_nodes,
12484 "snodes_group_uuids": snodes_group_uuids,
12485 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12487 # this happens to be the same format used for hooks
12488 "nics": _NICListToTuple(self, instance.nics),
12489 "disk_template": instance.disk_template,
12491 "hypervisor": instance.hypervisor,
12492 "network_port": instance.network_port,
12493 "hv_instance": instance.hvparams,
12494 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12495 "be_instance": instance.beparams,
12496 "be_actual": cluster.FillBE(instance),
12497 "os_instance": instance.osparams,
12498 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12499 "serial_no": instance.serial_no,
12500 "mtime": instance.mtime,
12501 "ctime": instance.ctime,
12502 "uuid": instance.uuid,
12508 def PrepareContainerMods(mods, private_fn):
12509 """Prepares a list of container modifications by adding a private data field.
12511 @type mods: list of tuples; (operation, index, parameters)
12512 @param mods: List of modifications
12513 @type private_fn: callable or None
12514 @param private_fn: Callable for constructing a private data field for a
12519 if private_fn is None:
12524 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12527 #: Type description for changes as returned by L{ApplyContainerMods}'s
12529 _TApplyContModsCbChanges = \
12530 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12531 ht.TNonEmptyString,
12536 def ApplyContainerMods(kind, container, chgdesc, mods,
12537 create_fn, modify_fn, remove_fn):
12538 """Applies descriptions in C{mods} to C{container}.
12541 @param kind: One-word item description
12542 @type container: list
12543 @param container: Container to modify
12544 @type chgdesc: None or list
12545 @param chgdesc: List of applied changes
12547 @param mods: Modifications as returned by L{PrepareContainerMods}
12548 @type create_fn: callable
12549 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12550 receives absolute item index, parameters and private data object as added
12551 by L{PrepareContainerMods}, returns tuple containing new item and changes
12553 @type modify_fn: callable
12554 @param modify_fn: Callback for modifying an existing item
12555 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12556 and private data object as added by L{PrepareContainerMods}, returns
12558 @type remove_fn: callable
12559 @param remove_fn: Callback on removing item; receives absolute item index,
12560 item and private data object as added by L{PrepareContainerMods}
12563 for (op, idx, params, private) in mods:
12566 absidx = len(container) - 1
12568 raise IndexError("Not accepting negative indices other than -1")
12569 elif idx > len(container):
12570 raise IndexError("Got %s index %s, but there are only %s" %
12571 (kind, idx, len(container)))
12577 if op == constants.DDM_ADD:
12578 # Calculate where item will be added
12580 addidx = len(container)
12584 if create_fn is None:
12587 (item, changes) = create_fn(addidx, params, private)
12590 container.append(item)
12593 assert idx <= len(container)
12594 # list.insert does so before the specified index
12595 container.insert(idx, item)
12597 # Retrieve existing item
12599 item = container[absidx]
12601 raise IndexError("Invalid %s index %s" % (kind, idx))
12603 if op == constants.DDM_REMOVE:
12606 if remove_fn is not None:
12607 remove_fn(absidx, item, private)
12609 changes = [("%s/%s" % (kind, absidx), "remove")]
12611 assert container[absidx] == item
12612 del container[absidx]
12613 elif op == constants.DDM_MODIFY:
12614 if modify_fn is not None:
12615 changes = modify_fn(absidx, item, params, private)
12617 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12619 assert _TApplyContModsCbChanges(changes)
12621 if not (chgdesc is None or changes is None):
12622 chgdesc.extend(changes)
12625 def _UpdateIvNames(base_index, disks):
12626 """Updates the C{iv_name} attribute of disks.
12628 @type disks: list of L{objects.Disk}
12631 for (idx, disk) in enumerate(disks):
12632 disk.iv_name = "disk/%s" % (base_index + idx, )
12635 class _InstNicModPrivate:
12636 """Data structure for network interface modifications.
12638 Used by L{LUInstanceSetParams}.
12641 def __init__(self):
12646 class LUInstanceSetParams(LogicalUnit):
12647 """Modifies an instances's parameters.
12650 HPATH = "instance-modify"
12651 HTYPE = constants.HTYPE_INSTANCE
12655 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12656 assert ht.TList(mods)
12657 assert not mods or len(mods[0]) in (2, 3)
12659 if mods and len(mods[0]) == 2:
12663 for op, params in mods:
12664 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12665 result.append((op, -1, params))
12669 raise errors.OpPrereqError("Only one %s add or remove operation is"
12670 " supported at a time" % kind,
12671 errors.ECODE_INVAL)
12673 result.append((constants.DDM_MODIFY, op, params))
12675 assert verify_fn(result)
12682 def _CheckMods(kind, mods, key_types, item_fn):
12683 """Ensures requested disk/NIC modifications are valid.
12686 for (op, _, params) in mods:
12687 assert ht.TDict(params)
12689 utils.ForceDictType(params, key_types)
12691 if op == constants.DDM_REMOVE:
12693 raise errors.OpPrereqError("No settings should be passed when"
12694 " removing a %s" % kind,
12695 errors.ECODE_INVAL)
12696 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12697 item_fn(op, params)
12699 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12702 def _VerifyDiskModification(op, params):
12703 """Verifies a disk modification.
12706 if op == constants.DDM_ADD:
12707 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12708 if mode not in constants.DISK_ACCESS_SET:
12709 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12710 errors.ECODE_INVAL)
12712 size = params.get(constants.IDISK_SIZE, None)
12714 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12715 constants.IDISK_SIZE, errors.ECODE_INVAL)
12719 except (TypeError, ValueError), err:
12720 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12721 errors.ECODE_INVAL)
12723 params[constants.IDISK_SIZE] = size
12725 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12726 raise errors.OpPrereqError("Disk size change not possible, use"
12727 " grow-disk", errors.ECODE_INVAL)
12730 def _VerifyNicModification(op, params):
12731 """Verifies a network interface modification.
12734 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12735 ip = params.get(constants.INIC_IP, None)
12736 req_net = params.get(constants.INIC_NETWORK, None)
12737 link = params.get(constants.NIC_LINK, None)
12738 mode = params.get(constants.NIC_MODE, None)
12739 if req_net is not None:
12740 if req_net.lower() == constants.VALUE_NONE:
12741 params[constants.INIC_NETWORK] = None
12743 elif link is not None or mode is not None:
12744 raise errors.OpPrereqError("If network is given"
12745 " mode or link should not",
12746 errors.ECODE_INVAL)
12748 if op == constants.DDM_ADD:
12749 macaddr = params.get(constants.INIC_MAC, None)
12750 if macaddr is None:
12751 params[constants.INIC_MAC] = constants.VALUE_AUTO
12754 if ip.lower() == constants.VALUE_NONE:
12755 params[constants.INIC_IP] = None
12757 if ip.lower() == constants.NIC_IP_POOL:
12758 if op == constants.DDM_ADD and req_net is None:
12759 raise errors.OpPrereqError("If ip=pool, parameter network"
12761 errors.ECODE_INVAL)
12763 if not netutils.IPAddress.IsValid(ip):
12764 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12765 errors.ECODE_INVAL)
12767 if constants.INIC_MAC in params:
12768 macaddr = params[constants.INIC_MAC]
12769 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12770 macaddr = utils.NormalizeAndValidateMac(macaddr)
12772 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12773 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12774 " modifying an existing NIC",
12775 errors.ECODE_INVAL)
12777 def CheckArguments(self):
12778 if not (self.op.nics or self.op.disks or self.op.disk_template or
12779 self.op.hvparams or self.op.beparams or self.op.os_name or
12780 self.op.offline is not None or self.op.runtime_mem):
12781 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12783 if self.op.hvparams:
12784 _CheckGlobalHvParams(self.op.hvparams)
12786 self.op.disks = self._UpgradeDiskNicMods(
12787 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12788 self.op.nics = self._UpgradeDiskNicMods(
12789 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12791 # Check disk modifications
12792 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12793 self._VerifyDiskModification)
12795 if self.op.disks and self.op.disk_template is not None:
12796 raise errors.OpPrereqError("Disk template conversion and other disk"
12797 " changes not supported at the same time",
12798 errors.ECODE_INVAL)
12800 if (self.op.disk_template and
12801 self.op.disk_template in constants.DTS_INT_MIRROR and
12802 self.op.remote_node is None):
12803 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12804 " one requires specifying a secondary node",
12805 errors.ECODE_INVAL)
12807 # Check NIC modifications
12808 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12809 self._VerifyNicModification)
12811 def ExpandNames(self):
12812 self._ExpandAndLockInstance()
12813 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12814 # Can't even acquire node locks in shared mode as upcoming changes in
12815 # Ganeti 2.6 will start to modify the node object on disk conversion
12816 self.needed_locks[locking.LEVEL_NODE] = []
12817 self.needed_locks[locking.LEVEL_NODE_RES] = []
12818 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12819 # Look node group to look up the ipolicy
12820 self.share_locks[locking.LEVEL_NODEGROUP] = 1
12822 def DeclareLocks(self, level):
12823 if level == locking.LEVEL_NODEGROUP:
12824 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12825 # Acquire locks for the instance's nodegroups optimistically. Needs
12826 # to be verified in CheckPrereq
12827 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12828 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12829 elif level == locking.LEVEL_NODE:
12830 self._LockInstancesNodes()
12831 if self.op.disk_template and self.op.remote_node:
12832 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12833 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12834 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12836 self.needed_locks[locking.LEVEL_NODE_RES] = \
12837 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12839 def BuildHooksEnv(self):
12840 """Build hooks env.
12842 This runs on the master, primary and secondaries.
12846 if constants.BE_MINMEM in self.be_new:
12847 args["minmem"] = self.be_new[constants.BE_MINMEM]
12848 if constants.BE_MAXMEM in self.be_new:
12849 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12850 if constants.BE_VCPUS in self.be_new:
12851 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12852 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12853 # information at all.
12855 if self._new_nics is not None:
12858 for nic in self._new_nics:
12859 n = copy.deepcopy(nic)
12860 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12861 n.nicparams = nicparams
12862 nics.append(_NICToTuple(self, n))
12864 args["nics"] = nics
12866 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12867 if self.op.disk_template:
12868 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12869 if self.op.runtime_mem:
12870 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12874 def BuildHooksNodes(self):
12875 """Build hooks nodes.
12878 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12881 def _PrepareNicModification(self, params, private, old_ip, old_net,
12882 old_params, cluster, pnode):
12884 update_params_dict = dict([(key, params[key])
12885 for key in constants.NICS_PARAMETERS
12888 req_link = update_params_dict.get(constants.NIC_LINK, None)
12889 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12891 new_net = params.get(constants.INIC_NETWORK, old_net)
12892 if new_net is not None:
12893 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12894 if netparams is None:
12895 raise errors.OpPrereqError("No netparams found for the network"
12896 " %s, propably not connected." % new_net,
12897 errors.ECODE_INVAL)
12898 new_params = dict(netparams)
12900 new_params = _GetUpdatedParams(old_params, update_params_dict)
12902 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12904 new_filled_params = cluster.SimpleFillNIC(new_params)
12905 objects.NIC.CheckParameterSyntax(new_filled_params)
12907 new_mode = new_filled_params[constants.NIC_MODE]
12908 if new_mode == constants.NIC_MODE_BRIDGED:
12909 bridge = new_filled_params[constants.NIC_LINK]
12910 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12912 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12914 self.warn.append(msg)
12916 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12918 elif new_mode == constants.NIC_MODE_ROUTED:
12919 ip = params.get(constants.INIC_IP, old_ip)
12921 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12922 " on a routed NIC", errors.ECODE_INVAL)
12924 if constants.INIC_MAC in params:
12925 mac = params[constants.INIC_MAC]
12927 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12928 errors.ECODE_INVAL)
12929 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12930 # otherwise generate the MAC address
12931 params[constants.INIC_MAC] = \
12932 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12934 # or validate/reserve the current one
12936 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12937 except errors.ReservationError:
12938 raise errors.OpPrereqError("MAC address '%s' already in use"
12939 " in cluster" % mac,
12940 errors.ECODE_NOTUNIQUE)
12941 elif new_net != old_net:
12943 def get_net_prefix(net):
12945 uuid = self.cfg.LookupNetwork(net)
12947 nobj = self.cfg.GetNetwork(uuid)
12948 return nobj.mac_prefix
12951 new_prefix = get_net_prefix(new_net)
12952 old_prefix = get_net_prefix(old_net)
12953 if old_prefix != new_prefix:
12954 params[constants.INIC_MAC] = \
12955 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12957 #if there is a change in nic-network configuration
12958 new_ip = params.get(constants.INIC_IP, old_ip)
12959 if (new_ip, new_net) != (old_ip, old_net):
12962 if new_ip.lower() == constants.NIC_IP_POOL:
12964 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12965 except errors.ReservationError:
12966 raise errors.OpPrereqError("Unable to get a free IP"
12967 " from the address pool",
12968 errors.ECODE_STATE)
12969 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12970 params[constants.INIC_IP] = new_ip
12971 elif new_ip != old_ip or new_net != old_net:
12973 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12974 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12975 except errors.ReservationError:
12976 raise errors.OpPrereqError("IP %s not available in network %s" %
12978 errors.ECODE_NOTUNIQUE)
12979 elif new_ip.lower() == constants.NIC_IP_POOL:
12980 raise errors.OpPrereqError("ip=pool, but no network found",
12981 errors.ECODE_INVAL)
12984 if self.op.conflicts_check:
12985 _CheckForConflictingIp(self, new_ip, pnode)
12990 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
12991 except errors.AddressPoolError:
12992 logging.warning("Release IP %s not contained in network %s",
12995 # there are no changes in (net, ip) tuple
12996 elif (old_net is not None and
12997 (req_link is not None or req_mode is not None)):
12998 raise errors.OpPrereqError("Not allowed to change link or mode of"
12999 " a NIC that is connected to a network.",
13000 errors.ECODE_INVAL)
13002 private.params = new_params
13003 private.filled = new_filled_params
13005 def CheckPrereq(self):
13006 """Check prerequisites.
13008 This only checks the instance list against the existing names.
13011 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13012 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13014 cluster = self.cluster = self.cfg.GetClusterInfo()
13015 assert self.instance is not None, \
13016 "Cannot retrieve locked instance %s" % self.op.instance_name
13018 pnode = instance.primary_node
13019 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13020 nodelist = list(instance.all_nodes)
13021 pnode_info = self.cfg.GetNodeInfo(pnode)
13022 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13024 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13025 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13026 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13028 # dictionary with instance information after the modification
13031 # Prepare disk/NIC modifications
13032 self.diskmod = PrepareContainerMods(self.op.disks, None)
13033 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13036 if self.op.os_name and not self.op.force:
13037 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13038 self.op.force_variant)
13039 instance_os = self.op.os_name
13041 instance_os = instance.os
13043 assert not (self.op.disk_template and self.op.disks), \
13044 "Can't modify disk template and apply disk changes at the same time"
13046 if self.op.disk_template:
13047 if instance.disk_template == self.op.disk_template:
13048 raise errors.OpPrereqError("Instance already has disk template %s" %
13049 instance.disk_template, errors.ECODE_INVAL)
13051 if (instance.disk_template,
13052 self.op.disk_template) not in self._DISK_CONVERSIONS:
13053 raise errors.OpPrereqError("Unsupported disk template conversion from"
13054 " %s to %s" % (instance.disk_template,
13055 self.op.disk_template),
13056 errors.ECODE_INVAL)
13057 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13058 msg="cannot change disk template")
13059 if self.op.disk_template in constants.DTS_INT_MIRROR:
13060 if self.op.remote_node == pnode:
13061 raise errors.OpPrereqError("Given new secondary node %s is the same"
13062 " as the primary node of the instance" %
13063 self.op.remote_node, errors.ECODE_STATE)
13064 _CheckNodeOnline(self, self.op.remote_node)
13065 _CheckNodeNotDrained(self, self.op.remote_node)
13066 # FIXME: here we assume that the old instance type is DT_PLAIN
13067 assert instance.disk_template == constants.DT_PLAIN
13068 disks = [{constants.IDISK_SIZE: d.size,
13069 constants.IDISK_VG: d.logical_id[0]}
13070 for d in instance.disks]
13071 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13072 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13074 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13075 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13076 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13078 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13079 ignore=self.op.ignore_ipolicy)
13080 if pnode_info.group != snode_info.group:
13081 self.LogWarning("The primary and secondary nodes are in two"
13082 " different node groups; the disk parameters"
13083 " from the first disk's node group will be"
13086 # hvparams processing
13087 if self.op.hvparams:
13088 hv_type = instance.hypervisor
13089 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13090 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13091 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13094 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13095 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13096 self.hv_proposed = self.hv_new = hv_new # the new actual values
13097 self.hv_inst = i_hvdict # the new dict (without defaults)
13099 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13101 self.hv_new = self.hv_inst = {}
13103 # beparams processing
13104 if self.op.beparams:
13105 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13107 objects.UpgradeBeParams(i_bedict)
13108 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13109 be_new = cluster.SimpleFillBE(i_bedict)
13110 self.be_proposed = self.be_new = be_new # the new actual values
13111 self.be_inst = i_bedict # the new dict (without defaults)
13113 self.be_new = self.be_inst = {}
13114 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13115 be_old = cluster.FillBE(instance)
13117 # CPU param validation -- checking every time a parameter is
13118 # changed to cover all cases where either CPU mask or vcpus have
13120 if (constants.BE_VCPUS in self.be_proposed and
13121 constants.HV_CPU_MASK in self.hv_proposed):
13123 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13124 # Verify mask is consistent with number of vCPUs. Can skip this
13125 # test if only 1 entry in the CPU mask, which means same mask
13126 # is applied to all vCPUs.
13127 if (len(cpu_list) > 1 and
13128 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13129 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13131 (self.be_proposed[constants.BE_VCPUS],
13132 self.hv_proposed[constants.HV_CPU_MASK]),
13133 errors.ECODE_INVAL)
13135 # Only perform this test if a new CPU mask is given
13136 if constants.HV_CPU_MASK in self.hv_new:
13137 # Calculate the largest CPU number requested
13138 max_requested_cpu = max(map(max, cpu_list))
13139 # Check that all of the instance's nodes have enough physical CPUs to
13140 # satisfy the requested CPU mask
13141 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13142 max_requested_cpu + 1, instance.hypervisor)
13144 # osparams processing
13145 if self.op.osparams:
13146 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13147 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13148 self.os_inst = i_osdict # the new dict (without defaults)
13154 #TODO(dynmem): do the appropriate check involving MINMEM
13155 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13156 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13157 mem_check_list = [pnode]
13158 if be_new[constants.BE_AUTO_BALANCE]:
13159 # either we changed auto_balance to yes or it was from before
13160 mem_check_list.extend(instance.secondary_nodes)
13161 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13162 instance.hypervisor)
13163 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13164 [instance.hypervisor])
13165 pninfo = nodeinfo[pnode]
13166 msg = pninfo.fail_msg
13168 # Assume the primary node is unreachable and go ahead
13169 self.warn.append("Can't get info from primary node %s: %s" %
13172 (_, _, (pnhvinfo, )) = pninfo.payload
13173 if not isinstance(pnhvinfo.get("memory_free", None), int):
13174 self.warn.append("Node data from primary node %s doesn't contain"
13175 " free memory information" % pnode)
13176 elif instance_info.fail_msg:
13177 self.warn.append("Can't get instance runtime information: %s" %
13178 instance_info.fail_msg)
13180 if instance_info.payload:
13181 current_mem = int(instance_info.payload["memory"])
13183 # Assume instance not running
13184 # (there is a slight race condition here, but it's not very
13185 # probable, and we have no other way to check)
13186 # TODO: Describe race condition
13188 #TODO(dynmem): do the appropriate check involving MINMEM
13189 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13190 pnhvinfo["memory_free"])
13192 raise errors.OpPrereqError("This change will prevent the instance"
13193 " from starting, due to %d MB of memory"
13194 " missing on its primary node" %
13195 miss_mem, errors.ECODE_NORES)
13197 if be_new[constants.BE_AUTO_BALANCE]:
13198 for node, nres in nodeinfo.items():
13199 if node not in instance.secondary_nodes:
13201 nres.Raise("Can't get info from secondary node %s" % node,
13202 prereq=True, ecode=errors.ECODE_STATE)
13203 (_, _, (nhvinfo, )) = nres.payload
13204 if not isinstance(nhvinfo.get("memory_free", None), int):
13205 raise errors.OpPrereqError("Secondary node %s didn't return free"
13206 " memory information" % node,
13207 errors.ECODE_STATE)
13208 #TODO(dynmem): do the appropriate check involving MINMEM
13209 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13210 raise errors.OpPrereqError("This change will prevent the instance"
13211 " from failover to its secondary node"
13212 " %s, due to not enough memory" % node,
13213 errors.ECODE_STATE)
13215 if self.op.runtime_mem:
13216 remote_info = self.rpc.call_instance_info(instance.primary_node,
13218 instance.hypervisor)
13219 remote_info.Raise("Error checking node %s" % instance.primary_node)
13220 if not remote_info.payload: # not running already
13221 raise errors.OpPrereqError("Instance %s is not running" %
13222 instance.name, errors.ECODE_STATE)
13224 current_memory = remote_info.payload["memory"]
13225 if (not self.op.force and
13226 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13227 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13228 raise errors.OpPrereqError("Instance %s must have memory between %d"
13229 " and %d MB of memory unless --force is"
13232 self.be_proposed[constants.BE_MINMEM],
13233 self.be_proposed[constants.BE_MAXMEM]),
13234 errors.ECODE_INVAL)
13236 delta = self.op.runtime_mem - current_memory
13238 _CheckNodeFreeMemory(self, instance.primary_node,
13239 "ballooning memory for instance %s" %
13240 instance.name, delta, instance.hypervisor)
13242 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13243 raise errors.OpPrereqError("Disk operations not supported for"
13244 " diskless instances", errors.ECODE_INVAL)
13246 def _PrepareNicCreate(_, params, private):
13247 self._PrepareNicModification(params, private, None, None,
13248 {}, cluster, pnode)
13249 return (None, None)
13251 def _PrepareNicMod(_, nic, params, private):
13252 self._PrepareNicModification(params, private, nic.ip, nic.network,
13253 nic.nicparams, cluster, pnode)
13256 def _PrepareNicRemove(_, params, __):
13258 net = params.network
13259 if net is not None and ip is not None:
13260 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13262 # Verify NIC changes (operating on copy)
13263 nics = instance.nics[:]
13264 ApplyContainerMods("NIC", nics, None, self.nicmod,
13265 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13266 if len(nics) > constants.MAX_NICS:
13267 raise errors.OpPrereqError("Instance has too many network interfaces"
13268 " (%d), cannot add more" % constants.MAX_NICS,
13269 errors.ECODE_STATE)
13271 # Verify disk changes (operating on a copy)
13272 disks = instance.disks[:]
13273 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13274 if len(disks) > constants.MAX_DISKS:
13275 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13276 " more" % constants.MAX_DISKS,
13277 errors.ECODE_STATE)
13278 disk_sizes = [disk.size for disk in instance.disks]
13279 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13281 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13282 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13284 if self.op.offline is not None:
13285 if self.op.offline:
13286 msg = "can't change to offline"
13288 msg = "can't change to online"
13289 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13291 # Pre-compute NIC changes (necessary to use result in hooks)
13292 self._nic_chgdesc = []
13294 # Operate on copies as this is still in prereq
13295 nics = [nic.Copy() for nic in instance.nics]
13296 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13297 self._CreateNewNic, self._ApplyNicMods, None)
13298 self._new_nics = nics
13299 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13301 self._new_nics = None
13302 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13304 if not self.op.ignore_ipolicy:
13305 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13308 # Fill ispec with backend parameters
13309 ispec[constants.ISPEC_SPINDLE_USE] = \
13310 self.be_new.get(constants.BE_SPINDLE_USE, None)
13311 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13314 # Copy ispec to verify parameters with min/max values separately
13315 ispec_max = ispec.copy()
13316 ispec_max[constants.ISPEC_MEM_SIZE] = \
13317 self.be_new.get(constants.BE_MAXMEM, None)
13318 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13319 ispec_min = ispec.copy()
13320 ispec_min[constants.ISPEC_MEM_SIZE] = \
13321 self.be_new.get(constants.BE_MINMEM, None)
13322 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13324 if (res_max or res_min):
13325 # FIXME: Improve error message by including information about whether
13326 # the upper or lower limit of the parameter fails the ipolicy.
13327 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13328 (group_info, group_info.name,
13329 utils.CommaJoin(set(res_max + res_min))))
13330 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13332 def _ConvertPlainToDrbd(self, feedback_fn):
13333 """Converts an instance from plain to drbd.
13336 feedback_fn("Converting template to drbd")
13337 instance = self.instance
13338 pnode = instance.primary_node
13339 snode = self.op.remote_node
13341 assert instance.disk_template == constants.DT_PLAIN
13343 # create a fake disk info for _GenerateDiskTemplate
13344 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13345 constants.IDISK_VG: d.logical_id[0]}
13346 for d in instance.disks]
13347 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13348 instance.name, pnode, [snode],
13349 disk_info, None, None, 0, feedback_fn,
13351 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13353 info = _GetInstanceInfoText(instance)
13354 feedback_fn("Creating additional volumes...")
13355 # first, create the missing data and meta devices
13356 for disk in anno_disks:
13357 # unfortunately this is... not too nice
13358 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13360 for child in disk.children:
13361 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13362 # at this stage, all new LVs have been created, we can rename the
13364 feedback_fn("Renaming original volumes...")
13365 rename_list = [(o, n.children[0].logical_id)
13366 for (o, n) in zip(instance.disks, new_disks)]
13367 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13368 result.Raise("Failed to rename original LVs")
13370 feedback_fn("Initializing DRBD devices...")
13371 # all child devices are in place, we can now create the DRBD devices
13372 for disk in anno_disks:
13373 for node in [pnode, snode]:
13374 f_create = node == pnode
13375 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13377 # at this point, the instance has been modified
13378 instance.disk_template = constants.DT_DRBD8
13379 instance.disks = new_disks
13380 self.cfg.Update(instance, feedback_fn)
13382 # Release node locks while waiting for sync
13383 _ReleaseLocks(self, locking.LEVEL_NODE)
13385 # disks are created, waiting for sync
13386 disk_abort = not _WaitForSync(self, instance,
13387 oneshot=not self.op.wait_for_sync)
13389 raise errors.OpExecError("There are some degraded disks for"
13390 " this instance, please cleanup manually")
13392 # Node resource locks will be released by caller
13394 def _ConvertDrbdToPlain(self, feedback_fn):
13395 """Converts an instance from drbd to plain.
13398 instance = self.instance
13400 assert len(instance.secondary_nodes) == 1
13401 assert instance.disk_template == constants.DT_DRBD8
13403 pnode = instance.primary_node
13404 snode = instance.secondary_nodes[0]
13405 feedback_fn("Converting template to plain")
13407 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13408 new_disks = [d.children[0] for d in instance.disks]
13410 # copy over size and mode
13411 for parent, child in zip(old_disks, new_disks):
13412 child.size = parent.size
13413 child.mode = parent.mode
13415 # this is a DRBD disk, return its port to the pool
13416 # NOTE: this must be done right before the call to cfg.Update!
13417 for disk in old_disks:
13418 tcp_port = disk.logical_id[2]
13419 self.cfg.AddTcpUdpPort(tcp_port)
13421 # update instance structure
13422 instance.disks = new_disks
13423 instance.disk_template = constants.DT_PLAIN
13424 self.cfg.Update(instance, feedback_fn)
13426 # Release locks in case removing disks takes a while
13427 _ReleaseLocks(self, locking.LEVEL_NODE)
13429 feedback_fn("Removing volumes on the secondary node...")
13430 for disk in old_disks:
13431 self.cfg.SetDiskID(disk, snode)
13432 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13434 self.LogWarning("Could not remove block device %s on node %s,"
13435 " continuing anyway: %s", disk.iv_name, snode, msg)
13437 feedback_fn("Removing unneeded volumes on the primary node...")
13438 for idx, disk in enumerate(old_disks):
13439 meta = disk.children[1]
13440 self.cfg.SetDiskID(meta, pnode)
13441 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13443 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13444 " continuing anyway: %s", idx, pnode, msg)
13446 def _CreateNewDisk(self, idx, params, _):
13447 """Creates a new disk.
13450 instance = self.instance
13453 if instance.disk_template in constants.DTS_FILEBASED:
13454 (file_driver, file_path) = instance.disks[0].logical_id
13455 file_path = os.path.dirname(file_path)
13457 file_driver = file_path = None
13460 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13461 instance.primary_node, instance.secondary_nodes,
13462 [params], file_path, file_driver, idx,
13463 self.Log, self.diskparams)[0]
13465 info = _GetInstanceInfoText(instance)
13467 logging.info("Creating volume %s for instance %s",
13468 disk.iv_name, instance.name)
13469 # Note: this needs to be kept in sync with _CreateDisks
13471 for node in instance.all_nodes:
13472 f_create = (node == instance.primary_node)
13474 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13475 except errors.OpExecError, err:
13476 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13477 disk.iv_name, disk, node, err)
13480 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13484 def _ModifyDisk(idx, disk, params, _):
13485 """Modifies a disk.
13488 disk.mode = params[constants.IDISK_MODE]
13491 ("disk.mode/%d" % idx, disk.mode),
13494 def _RemoveDisk(self, idx, root, _):
13498 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13499 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13500 self.cfg.SetDiskID(disk, node)
13501 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13503 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13504 " continuing anyway", idx, node, msg)
13506 # if this is a DRBD disk, return its port to the pool
13507 if root.dev_type in constants.LDS_DRBD:
13508 self.cfg.AddTcpUdpPort(root.logical_id[2])
13511 def _CreateNewNic(idx, params, private):
13512 """Creates data structure for a new network interface.
13515 mac = params[constants.INIC_MAC]
13516 ip = params.get(constants.INIC_IP, None)
13517 net = params.get(constants.INIC_NETWORK, None)
13518 #TODO: not private.filled?? can a nic have no nicparams??
13519 nicparams = private.filled
13521 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13523 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13524 (mac, ip, private.filled[constants.NIC_MODE],
13525 private.filled[constants.NIC_LINK],
13530 def _ApplyNicMods(idx, nic, params, private):
13531 """Modifies a network interface.
13536 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13538 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13539 setattr(nic, key, params[key])
13542 nic.nicparams = private.filled
13544 for (key, val) in nic.nicparams.items():
13545 changes.append(("nic.%s/%d" % (key, idx), val))
13549 def Exec(self, feedback_fn):
13550 """Modifies an instance.
13552 All parameters take effect only at the next restart of the instance.
13555 # Process here the warnings from CheckPrereq, as we don't have a
13556 # feedback_fn there.
13557 # TODO: Replace with self.LogWarning
13558 for warn in self.warn:
13559 feedback_fn("WARNING: %s" % warn)
13561 assert ((self.op.disk_template is None) ^
13562 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13563 "Not owning any node resource locks"
13566 instance = self.instance
13569 if self.op.runtime_mem:
13570 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13572 self.op.runtime_mem)
13573 rpcres.Raise("Cannot modify instance runtime memory")
13574 result.append(("runtime_memory", self.op.runtime_mem))
13576 # Apply disk changes
13577 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13578 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13579 _UpdateIvNames(0, instance.disks)
13581 if self.op.disk_template:
13583 check_nodes = set(instance.all_nodes)
13584 if self.op.remote_node:
13585 check_nodes.add(self.op.remote_node)
13586 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13587 owned = self.owned_locks(level)
13588 assert not (check_nodes - owned), \
13589 ("Not owning the correct locks, owning %r, expected at least %r" %
13590 (owned, check_nodes))
13592 r_shut = _ShutdownInstanceDisks(self, instance)
13594 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13595 " proceed with disk template conversion")
13596 mode = (instance.disk_template, self.op.disk_template)
13598 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13600 self.cfg.ReleaseDRBDMinors(instance.name)
13602 result.append(("disk_template", self.op.disk_template))
13604 assert instance.disk_template == self.op.disk_template, \
13605 ("Expected disk template '%s', found '%s'" %
13606 (self.op.disk_template, instance.disk_template))
13608 # Release node and resource locks if there are any (they might already have
13609 # been released during disk conversion)
13610 _ReleaseLocks(self, locking.LEVEL_NODE)
13611 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13613 # Apply NIC changes
13614 if self._new_nics is not None:
13615 instance.nics = self._new_nics
13616 result.extend(self._nic_chgdesc)
13619 if self.op.hvparams:
13620 instance.hvparams = self.hv_inst
13621 for key, val in self.op.hvparams.iteritems():
13622 result.append(("hv/%s" % key, val))
13625 if self.op.beparams:
13626 instance.beparams = self.be_inst
13627 for key, val in self.op.beparams.iteritems():
13628 result.append(("be/%s" % key, val))
13631 if self.op.os_name:
13632 instance.os = self.op.os_name
13635 if self.op.osparams:
13636 instance.osparams = self.os_inst
13637 for key, val in self.op.osparams.iteritems():
13638 result.append(("os/%s" % key, val))
13640 if self.op.offline is None:
13643 elif self.op.offline:
13644 # Mark instance as offline
13645 self.cfg.MarkInstanceOffline(instance.name)
13646 result.append(("admin_state", constants.ADMINST_OFFLINE))
13648 # Mark instance as online, but stopped
13649 self.cfg.MarkInstanceDown(instance.name)
13650 result.append(("admin_state", constants.ADMINST_DOWN))
13652 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13654 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13655 self.owned_locks(locking.LEVEL_NODE)), \
13656 "All node locks should have been released by now"
13660 _DISK_CONVERSIONS = {
13661 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13662 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13666 class LUInstanceChangeGroup(LogicalUnit):
13667 HPATH = "instance-change-group"
13668 HTYPE = constants.HTYPE_INSTANCE
13671 def ExpandNames(self):
13672 self.share_locks = _ShareAll()
13673 self.needed_locks = {
13674 locking.LEVEL_NODEGROUP: [],
13675 locking.LEVEL_NODE: [],
13678 self._ExpandAndLockInstance()
13680 if self.op.target_groups:
13681 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13682 self.op.target_groups)
13684 self.req_target_uuids = None
13686 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13688 def DeclareLocks(self, level):
13689 if level == locking.LEVEL_NODEGROUP:
13690 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13692 if self.req_target_uuids:
13693 lock_groups = set(self.req_target_uuids)
13695 # Lock all groups used by instance optimistically; this requires going
13696 # via the node before it's locked, requiring verification later on
13697 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13698 lock_groups.update(instance_groups)
13700 # No target groups, need to lock all of them
13701 lock_groups = locking.ALL_SET
13703 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13705 elif level == locking.LEVEL_NODE:
13706 if self.req_target_uuids:
13707 # Lock all nodes used by instances
13708 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13709 self._LockInstancesNodes()
13711 # Lock all nodes in all potential target groups
13712 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13713 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13714 member_nodes = [node_name
13715 for group in lock_groups
13716 for node_name in self.cfg.GetNodeGroup(group).members]
13717 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13719 # Lock all nodes as all groups are potential targets
13720 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13722 def CheckPrereq(self):
13723 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13724 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13725 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13727 assert (self.req_target_uuids is None or
13728 owned_groups.issuperset(self.req_target_uuids))
13729 assert owned_instances == set([self.op.instance_name])
13731 # Get instance information
13732 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13734 # Check if node groups for locked instance are still correct
13735 assert owned_nodes.issuperset(self.instance.all_nodes), \
13736 ("Instance %s's nodes changed while we kept the lock" %
13737 self.op.instance_name)
13739 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13742 if self.req_target_uuids:
13743 # User requested specific target groups
13744 self.target_uuids = frozenset(self.req_target_uuids)
13746 # All groups except those used by the instance are potential targets
13747 self.target_uuids = owned_groups - inst_groups
13749 conflicting_groups = self.target_uuids & inst_groups
13750 if conflicting_groups:
13751 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13752 " used by the instance '%s'" %
13753 (utils.CommaJoin(conflicting_groups),
13754 self.op.instance_name),
13755 errors.ECODE_INVAL)
13757 if not self.target_uuids:
13758 raise errors.OpPrereqError("There are no possible target groups",
13759 errors.ECODE_INVAL)
13761 def BuildHooksEnv(self):
13762 """Build hooks env.
13765 assert self.target_uuids
13768 "TARGET_GROUPS": " ".join(self.target_uuids),
13771 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13775 def BuildHooksNodes(self):
13776 """Build hooks nodes.
13779 mn = self.cfg.GetMasterNode()
13780 return ([mn], [mn])
13782 def Exec(self, feedback_fn):
13783 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13785 assert instances == [self.op.instance_name], "Instance not locked"
13787 req = iallocator.IAReqGroupChange(instances=instances,
13788 target_groups=list(self.target_uuids))
13789 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13791 ial.Run(self.op.iallocator)
13793 if not ial.success:
13794 raise errors.OpPrereqError("Can't compute solution for changing group of"
13795 " instance '%s' using iallocator '%s': %s" %
13796 (self.op.instance_name, self.op.iallocator,
13797 ial.info), errors.ECODE_NORES)
13799 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13801 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13802 " instance '%s'", len(jobs), self.op.instance_name)
13804 return ResultWithJobs(jobs)
13807 class LUBackupQuery(NoHooksLU):
13808 """Query the exports list
13813 def CheckArguments(self):
13814 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13815 ["node", "export"], self.op.use_locking)
13817 def ExpandNames(self):
13818 self.expq.ExpandNames(self)
13820 def DeclareLocks(self, level):
13821 self.expq.DeclareLocks(self, level)
13823 def Exec(self, feedback_fn):
13826 for (node, expname) in self.expq.OldStyleQuery(self):
13827 if expname is None:
13828 result[node] = False
13830 result.setdefault(node, []).append(expname)
13835 class _ExportQuery(_QueryBase):
13836 FIELDS = query.EXPORT_FIELDS
13838 #: The node name is not a unique key for this query
13839 SORT_FIELD = "node"
13841 def ExpandNames(self, lu):
13842 lu.needed_locks = {}
13844 # The following variables interact with _QueryBase._GetNames
13846 self.wanted = _GetWantedNodes(lu, self.names)
13848 self.wanted = locking.ALL_SET
13850 self.do_locking = self.use_locking
13852 if self.do_locking:
13853 lu.share_locks = _ShareAll()
13854 lu.needed_locks = {
13855 locking.LEVEL_NODE: self.wanted,
13858 def DeclareLocks(self, lu, level):
13861 def _GetQueryData(self, lu):
13862 """Computes the list of nodes and their attributes.
13865 # Locking is not used
13867 assert not (compat.any(lu.glm.is_owned(level)
13868 for level in locking.LEVELS
13869 if level != locking.LEVEL_CLUSTER) or
13870 self.do_locking or self.use_locking)
13872 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13876 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13878 result.append((node, None))
13880 result.extend((node, expname) for expname in nres.payload)
13885 class LUBackupPrepare(NoHooksLU):
13886 """Prepares an instance for an export and returns useful information.
13891 def ExpandNames(self):
13892 self._ExpandAndLockInstance()
13894 def CheckPrereq(self):
13895 """Check prerequisites.
13898 instance_name = self.op.instance_name
13900 self.instance = self.cfg.GetInstanceInfo(instance_name)
13901 assert self.instance is not None, \
13902 "Cannot retrieve locked instance %s" % self.op.instance_name
13903 _CheckNodeOnline(self, self.instance.primary_node)
13905 self._cds = _GetClusterDomainSecret()
13907 def Exec(self, feedback_fn):
13908 """Prepares an instance for an export.
13911 instance = self.instance
13913 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13914 salt = utils.GenerateSecret(8)
13916 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13917 result = self.rpc.call_x509_cert_create(instance.primary_node,
13918 constants.RIE_CERT_VALIDITY)
13919 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13921 (name, cert_pem) = result.payload
13923 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13927 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13928 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13930 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13936 class LUBackupExport(LogicalUnit):
13937 """Export an instance to an image in the cluster.
13940 HPATH = "instance-export"
13941 HTYPE = constants.HTYPE_INSTANCE
13944 def CheckArguments(self):
13945 """Check the arguments.
13948 self.x509_key_name = self.op.x509_key_name
13949 self.dest_x509_ca_pem = self.op.destination_x509_ca
13951 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13952 if not self.x509_key_name:
13953 raise errors.OpPrereqError("Missing X509 key name for encryption",
13954 errors.ECODE_INVAL)
13956 if not self.dest_x509_ca_pem:
13957 raise errors.OpPrereqError("Missing destination X509 CA",
13958 errors.ECODE_INVAL)
13960 def ExpandNames(self):
13961 self._ExpandAndLockInstance()
13963 # Lock all nodes for local exports
13964 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13965 # FIXME: lock only instance primary and destination node
13967 # Sad but true, for now we have do lock all nodes, as we don't know where
13968 # the previous export might be, and in this LU we search for it and
13969 # remove it from its current node. In the future we could fix this by:
13970 # - making a tasklet to search (share-lock all), then create the
13971 # new one, then one to remove, after
13972 # - removing the removal operation altogether
13973 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13975 def DeclareLocks(self, level):
13976 """Last minute lock declaration."""
13977 # All nodes are locked anyway, so nothing to do here.
13979 def BuildHooksEnv(self):
13980 """Build hooks env.
13982 This will run on the master, primary node and target node.
13986 "EXPORT_MODE": self.op.mode,
13987 "EXPORT_NODE": self.op.target_node,
13988 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13989 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13990 # TODO: Generic function for boolean env variables
13991 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13994 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13998 def BuildHooksNodes(self):
13999 """Build hooks nodes.
14002 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14004 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14005 nl.append(self.op.target_node)
14009 def CheckPrereq(self):
14010 """Check prerequisites.
14012 This checks that the instance and node names are valid.
14015 instance_name = self.op.instance_name
14017 self.instance = self.cfg.GetInstanceInfo(instance_name)
14018 assert self.instance is not None, \
14019 "Cannot retrieve locked instance %s" % self.op.instance_name
14020 _CheckNodeOnline(self, self.instance.primary_node)
14022 if (self.op.remove_instance and
14023 self.instance.admin_state == constants.ADMINST_UP and
14024 not self.op.shutdown):
14025 raise errors.OpPrereqError("Can not remove instance without shutting it"
14026 " down before", errors.ECODE_STATE)
14028 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14029 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14030 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14031 assert self.dst_node is not None
14033 _CheckNodeOnline(self, self.dst_node.name)
14034 _CheckNodeNotDrained(self, self.dst_node.name)
14037 self.dest_disk_info = None
14038 self.dest_x509_ca = None
14040 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14041 self.dst_node = None
14043 if len(self.op.target_node) != len(self.instance.disks):
14044 raise errors.OpPrereqError(("Received destination information for %s"
14045 " disks, but instance %s has %s disks") %
14046 (len(self.op.target_node), instance_name,
14047 len(self.instance.disks)),
14048 errors.ECODE_INVAL)
14050 cds = _GetClusterDomainSecret()
14052 # Check X509 key name
14054 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14055 except (TypeError, ValueError), err:
14056 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14057 errors.ECODE_INVAL)
14059 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14060 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14061 errors.ECODE_INVAL)
14063 # Load and verify CA
14065 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14066 except OpenSSL.crypto.Error, err:
14067 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14068 (err, ), errors.ECODE_INVAL)
14070 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14071 if errcode is not None:
14072 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14073 (msg, ), errors.ECODE_INVAL)
14075 self.dest_x509_ca = cert
14077 # Verify target information
14079 for idx, disk_data in enumerate(self.op.target_node):
14081 (host, port, magic) = \
14082 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14083 except errors.GenericError, err:
14084 raise errors.OpPrereqError("Target info for disk %s: %s" %
14085 (idx, err), errors.ECODE_INVAL)
14087 disk_info.append((host, port, magic))
14089 assert len(disk_info) == len(self.op.target_node)
14090 self.dest_disk_info = disk_info
14093 raise errors.ProgrammerError("Unhandled export mode %r" %
14096 # instance disk type verification
14097 # TODO: Implement export support for file-based disks
14098 for disk in self.instance.disks:
14099 if disk.dev_type == constants.LD_FILE:
14100 raise errors.OpPrereqError("Export not supported for instances with"
14101 " file-based disks", errors.ECODE_INVAL)
14103 def _CleanupExports(self, feedback_fn):
14104 """Removes exports of current instance from all other nodes.
14106 If an instance in a cluster with nodes A..D was exported to node C, its
14107 exports will be removed from the nodes A, B and D.
14110 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14112 nodelist = self.cfg.GetNodeList()
14113 nodelist.remove(self.dst_node.name)
14115 # on one-node clusters nodelist will be empty after the removal
14116 # if we proceed the backup would be removed because OpBackupQuery
14117 # substitutes an empty list with the full cluster node list.
14118 iname = self.instance.name
14120 feedback_fn("Removing old exports for instance %s" % iname)
14121 exportlist = self.rpc.call_export_list(nodelist)
14122 for node in exportlist:
14123 if exportlist[node].fail_msg:
14125 if iname in exportlist[node].payload:
14126 msg = self.rpc.call_export_remove(node, iname).fail_msg
14128 self.LogWarning("Could not remove older export for instance %s"
14129 " on node %s: %s", iname, node, msg)
14131 def Exec(self, feedback_fn):
14132 """Export an instance to an image in the cluster.
14135 assert self.op.mode in constants.EXPORT_MODES
14137 instance = self.instance
14138 src_node = instance.primary_node
14140 if self.op.shutdown:
14141 # shutdown the instance, but not the disks
14142 feedback_fn("Shutting down instance %s" % instance.name)
14143 result = self.rpc.call_instance_shutdown(src_node, instance,
14144 self.op.shutdown_timeout)
14145 # TODO: Maybe ignore failures if ignore_remove_failures is set
14146 result.Raise("Could not shutdown instance %s on"
14147 " node %s" % (instance.name, src_node))
14149 # set the disks ID correctly since call_instance_start needs the
14150 # correct drbd minor to create the symlinks
14151 for disk in instance.disks:
14152 self.cfg.SetDiskID(disk, src_node)
14154 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14157 # Activate the instance disks if we'exporting a stopped instance
14158 feedback_fn("Activating disks for %s" % instance.name)
14159 _StartInstanceDisks(self, instance, None)
14162 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14165 helper.CreateSnapshots()
14167 if (self.op.shutdown and
14168 instance.admin_state == constants.ADMINST_UP and
14169 not self.op.remove_instance):
14170 assert not activate_disks
14171 feedback_fn("Starting instance %s" % instance.name)
14172 result = self.rpc.call_instance_start(src_node,
14173 (instance, None, None), False)
14174 msg = result.fail_msg
14176 feedback_fn("Failed to start instance: %s" % msg)
14177 _ShutdownInstanceDisks(self, instance)
14178 raise errors.OpExecError("Could not start instance: %s" % msg)
14180 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14181 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14182 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14183 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14184 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14186 (key_name, _, _) = self.x509_key_name
14189 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14192 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14193 key_name, dest_ca_pem,
14198 # Check for backwards compatibility
14199 assert len(dresults) == len(instance.disks)
14200 assert compat.all(isinstance(i, bool) for i in dresults), \
14201 "Not all results are boolean: %r" % dresults
14205 feedback_fn("Deactivating disks for %s" % instance.name)
14206 _ShutdownInstanceDisks(self, instance)
14208 if not (compat.all(dresults) and fin_resu):
14211 failures.append("export finalization")
14212 if not compat.all(dresults):
14213 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14215 failures.append("disk export: disk(s) %s" % fdsk)
14217 raise errors.OpExecError("Export failed, errors in %s" %
14218 utils.CommaJoin(failures))
14220 # At this point, the export was successful, we can cleanup/finish
14222 # Remove instance if requested
14223 if self.op.remove_instance:
14224 feedback_fn("Removing instance %s" % instance.name)
14225 _RemoveInstance(self, feedback_fn, instance,
14226 self.op.ignore_remove_failures)
14228 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14229 self._CleanupExports(feedback_fn)
14231 return fin_resu, dresults
14234 class LUBackupRemove(NoHooksLU):
14235 """Remove exports related to the named instance.
14240 def ExpandNames(self):
14241 self.needed_locks = {}
14242 # We need all nodes to be locked in order for RemoveExport to work, but we
14243 # don't need to lock the instance itself, as nothing will happen to it (and
14244 # we can remove exports also for a removed instance)
14245 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14247 def Exec(self, feedback_fn):
14248 """Remove any export.
14251 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14252 # If the instance was not found we'll try with the name that was passed in.
14253 # This will only work if it was an FQDN, though.
14255 if not instance_name:
14257 instance_name = self.op.instance_name
14259 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14260 exportlist = self.rpc.call_export_list(locked_nodes)
14262 for node in exportlist:
14263 msg = exportlist[node].fail_msg
14265 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14267 if instance_name in exportlist[node].payload:
14269 result = self.rpc.call_export_remove(node, instance_name)
14270 msg = result.fail_msg
14272 logging.error("Could not remove export for instance %s"
14273 " on node %s: %s", instance_name, node, msg)
14275 if fqdn_warn and not found:
14276 feedback_fn("Export not found. If trying to remove an export belonging"
14277 " to a deleted instance please use its Fully Qualified"
14281 class LUGroupAdd(LogicalUnit):
14282 """Logical unit for creating node groups.
14285 HPATH = "group-add"
14286 HTYPE = constants.HTYPE_GROUP
14289 def ExpandNames(self):
14290 # We need the new group's UUID here so that we can create and acquire the
14291 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14292 # that it should not check whether the UUID exists in the configuration.
14293 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14294 self.needed_locks = {}
14295 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14297 def CheckPrereq(self):
14298 """Check prerequisites.
14300 This checks that the given group name is not an existing node group
14305 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14306 except errors.OpPrereqError:
14309 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14310 " node group (UUID: %s)" %
14311 (self.op.group_name, existing_uuid),
14312 errors.ECODE_EXISTS)
14314 if self.op.ndparams:
14315 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14317 if self.op.hv_state:
14318 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14320 self.new_hv_state = None
14322 if self.op.disk_state:
14323 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14325 self.new_disk_state = None
14327 if self.op.diskparams:
14328 for templ in constants.DISK_TEMPLATES:
14329 if templ in self.op.diskparams:
14330 utils.ForceDictType(self.op.diskparams[templ],
14331 constants.DISK_DT_TYPES)
14332 self.new_diskparams = self.op.diskparams
14334 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14335 except errors.OpPrereqError, err:
14336 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14337 errors.ECODE_INVAL)
14339 self.new_diskparams = {}
14341 if self.op.ipolicy:
14342 cluster = self.cfg.GetClusterInfo()
14343 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14345 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14346 except errors.ConfigurationError, err:
14347 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14348 errors.ECODE_INVAL)
14350 def BuildHooksEnv(self):
14351 """Build hooks env.
14355 "GROUP_NAME": self.op.group_name,
14358 def BuildHooksNodes(self):
14359 """Build hooks nodes.
14362 mn = self.cfg.GetMasterNode()
14363 return ([mn], [mn])
14365 def Exec(self, feedback_fn):
14366 """Add the node group to the cluster.
14369 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14370 uuid=self.group_uuid,
14371 alloc_policy=self.op.alloc_policy,
14372 ndparams=self.op.ndparams,
14373 diskparams=self.new_diskparams,
14374 ipolicy=self.op.ipolicy,
14375 hv_state_static=self.new_hv_state,
14376 disk_state_static=self.new_disk_state)
14378 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14379 del self.remove_locks[locking.LEVEL_NODEGROUP]
14382 class LUGroupAssignNodes(NoHooksLU):
14383 """Logical unit for assigning nodes to groups.
14388 def ExpandNames(self):
14389 # These raise errors.OpPrereqError on their own:
14390 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14391 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14393 # We want to lock all the affected nodes and groups. We have readily
14394 # available the list of nodes, and the *destination* group. To gather the
14395 # list of "source" groups, we need to fetch node information later on.
14396 self.needed_locks = {
14397 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14398 locking.LEVEL_NODE: self.op.nodes,
14401 def DeclareLocks(self, level):
14402 if level == locking.LEVEL_NODEGROUP:
14403 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14405 # Try to get all affected nodes' groups without having the group or node
14406 # lock yet. Needs verification later in the code flow.
14407 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14409 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14411 def CheckPrereq(self):
14412 """Check prerequisites.
14415 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14416 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14417 frozenset(self.op.nodes))
14419 expected_locks = (set([self.group_uuid]) |
14420 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14421 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14422 if actual_locks != expected_locks:
14423 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14424 " current groups are '%s', used to be '%s'" %
14425 (utils.CommaJoin(expected_locks),
14426 utils.CommaJoin(actual_locks)))
14428 self.node_data = self.cfg.GetAllNodesInfo()
14429 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14430 instance_data = self.cfg.GetAllInstancesInfo()
14432 if self.group is None:
14433 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14434 (self.op.group_name, self.group_uuid))
14436 (new_splits, previous_splits) = \
14437 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14438 for node in self.op.nodes],
14439 self.node_data, instance_data)
14442 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14444 if not self.op.force:
14445 raise errors.OpExecError("The following instances get split by this"
14446 " change and --force was not given: %s" %
14449 self.LogWarning("This operation will split the following instances: %s",
14452 if previous_splits:
14453 self.LogWarning("In addition, these already-split instances continue"
14454 " to be split across groups: %s",
14455 utils.CommaJoin(utils.NiceSort(previous_splits)))
14457 def Exec(self, feedback_fn):
14458 """Assign nodes to a new group.
14461 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14463 self.cfg.AssignGroupNodes(mods)
14466 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14467 """Check for split instances after a node assignment.
14469 This method considers a series of node assignments as an atomic operation,
14470 and returns information about split instances after applying the set of
14473 In particular, it returns information about newly split instances, and
14474 instances that were already split, and remain so after the change.
14476 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14479 @type changes: list of (node_name, new_group_uuid) pairs.
14480 @param changes: list of node assignments to consider.
14481 @param node_data: a dict with data for all nodes
14482 @param instance_data: a dict with all instances to consider
14483 @rtype: a two-tuple
14484 @return: a list of instances that were previously okay and result split as a
14485 consequence of this change, and a list of instances that were previously
14486 split and this change does not fix.
14489 changed_nodes = dict((node, group) for node, group in changes
14490 if node_data[node].group != group)
14492 all_split_instances = set()
14493 previously_split_instances = set()
14495 def InstanceNodes(instance):
14496 return [instance.primary_node] + list(instance.secondary_nodes)
14498 for inst in instance_data.values():
14499 if inst.disk_template not in constants.DTS_INT_MIRROR:
14502 instance_nodes = InstanceNodes(inst)
14504 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14505 previously_split_instances.add(inst.name)
14507 if len(set(changed_nodes.get(node, node_data[node].group)
14508 for node in instance_nodes)) > 1:
14509 all_split_instances.add(inst.name)
14511 return (list(all_split_instances - previously_split_instances),
14512 list(previously_split_instances & all_split_instances))
14515 class _GroupQuery(_QueryBase):
14516 FIELDS = query.GROUP_FIELDS
14518 def ExpandNames(self, lu):
14519 lu.needed_locks = {}
14521 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14522 self._cluster = lu.cfg.GetClusterInfo()
14523 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14526 self.wanted = [name_to_uuid[name]
14527 for name in utils.NiceSort(name_to_uuid.keys())]
14529 # Accept names to be either names or UUIDs.
14532 all_uuid = frozenset(self._all_groups.keys())
14534 for name in self.names:
14535 if name in all_uuid:
14536 self.wanted.append(name)
14537 elif name in name_to_uuid:
14538 self.wanted.append(name_to_uuid[name])
14540 missing.append(name)
14543 raise errors.OpPrereqError("Some groups do not exist: %s" %
14544 utils.CommaJoin(missing),
14545 errors.ECODE_NOENT)
14547 def DeclareLocks(self, lu, level):
14550 def _GetQueryData(self, lu):
14551 """Computes the list of node groups and their attributes.
14554 do_nodes = query.GQ_NODE in self.requested_data
14555 do_instances = query.GQ_INST in self.requested_data
14557 group_to_nodes = None
14558 group_to_instances = None
14560 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14561 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14562 # latter GetAllInstancesInfo() is not enough, for we have to go through
14563 # instance->node. Hence, we will need to process nodes even if we only need
14564 # instance information.
14565 if do_nodes or do_instances:
14566 all_nodes = lu.cfg.GetAllNodesInfo()
14567 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14570 for node in all_nodes.values():
14571 if node.group in group_to_nodes:
14572 group_to_nodes[node.group].append(node.name)
14573 node_to_group[node.name] = node.group
14576 all_instances = lu.cfg.GetAllInstancesInfo()
14577 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14579 for instance in all_instances.values():
14580 node = instance.primary_node
14581 if node in node_to_group:
14582 group_to_instances[node_to_group[node]].append(instance.name)
14585 # Do not pass on node information if it was not requested.
14586 group_to_nodes = None
14588 return query.GroupQueryData(self._cluster,
14589 [self._all_groups[uuid]
14590 for uuid in self.wanted],
14591 group_to_nodes, group_to_instances,
14592 query.GQ_DISKPARAMS in self.requested_data)
14595 class LUGroupQuery(NoHooksLU):
14596 """Logical unit for querying node groups.
14601 def CheckArguments(self):
14602 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14603 self.op.output_fields, False)
14605 def ExpandNames(self):
14606 self.gq.ExpandNames(self)
14608 def DeclareLocks(self, level):
14609 self.gq.DeclareLocks(self, level)
14611 def Exec(self, feedback_fn):
14612 return self.gq.OldStyleQuery(self)
14615 class LUGroupSetParams(LogicalUnit):
14616 """Modifies the parameters of a node group.
14619 HPATH = "group-modify"
14620 HTYPE = constants.HTYPE_GROUP
14623 def CheckArguments(self):
14626 self.op.diskparams,
14627 self.op.alloc_policy,
14629 self.op.disk_state,
14633 if all_changes.count(None) == len(all_changes):
14634 raise errors.OpPrereqError("Please pass at least one modification",
14635 errors.ECODE_INVAL)
14637 def ExpandNames(self):
14638 # This raises errors.OpPrereqError on its own:
14639 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14641 self.needed_locks = {
14642 locking.LEVEL_INSTANCE: [],
14643 locking.LEVEL_NODEGROUP: [self.group_uuid],
14646 self.share_locks[locking.LEVEL_INSTANCE] = 1
14648 def DeclareLocks(self, level):
14649 if level == locking.LEVEL_INSTANCE:
14650 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14652 # Lock instances optimistically, needs verification once group lock has
14654 self.needed_locks[locking.LEVEL_INSTANCE] = \
14655 self.cfg.GetNodeGroupInstances(self.group_uuid)
14658 def _UpdateAndVerifyDiskParams(old, new):
14659 """Updates and verifies disk parameters.
14662 new_params = _GetUpdatedParams(old, new)
14663 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14666 def CheckPrereq(self):
14667 """Check prerequisites.
14670 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14672 # Check if locked instances are still correct
14673 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14675 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14676 cluster = self.cfg.GetClusterInfo()
14678 if self.group is None:
14679 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14680 (self.op.group_name, self.group_uuid))
14682 if self.op.ndparams:
14683 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14684 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14685 self.new_ndparams = new_ndparams
14687 if self.op.diskparams:
14688 diskparams = self.group.diskparams
14689 uavdp = self._UpdateAndVerifyDiskParams
14690 # For each disktemplate subdict update and verify the values
14691 new_diskparams = dict((dt,
14692 uavdp(diskparams.get(dt, {}),
14693 self.op.diskparams[dt]))
14694 for dt in constants.DISK_TEMPLATES
14695 if dt in self.op.diskparams)
14696 # As we've all subdicts of diskparams ready, lets merge the actual
14697 # dict with all updated subdicts
14698 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14700 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14701 except errors.OpPrereqError, err:
14702 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14703 errors.ECODE_INVAL)
14705 if self.op.hv_state:
14706 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14707 self.group.hv_state_static)
14709 if self.op.disk_state:
14710 self.new_disk_state = \
14711 _MergeAndVerifyDiskState(self.op.disk_state,
14712 self.group.disk_state_static)
14714 if self.op.ipolicy:
14715 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14719 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14720 inst_filter = lambda inst: inst.name in owned_instances
14721 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14722 gmi = ganeti.masterd.instance
14724 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14726 new_ipolicy, instances)
14729 self.LogWarning("After the ipolicy change the following instances"
14730 " violate them: %s",
14731 utils.CommaJoin(violations))
14733 def BuildHooksEnv(self):
14734 """Build hooks env.
14738 "GROUP_NAME": self.op.group_name,
14739 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14742 def BuildHooksNodes(self):
14743 """Build hooks nodes.
14746 mn = self.cfg.GetMasterNode()
14747 return ([mn], [mn])
14749 def Exec(self, feedback_fn):
14750 """Modifies the node group.
14755 if self.op.ndparams:
14756 self.group.ndparams = self.new_ndparams
14757 result.append(("ndparams", str(self.group.ndparams)))
14759 if self.op.diskparams:
14760 self.group.diskparams = self.new_diskparams
14761 result.append(("diskparams", str(self.group.diskparams)))
14763 if self.op.alloc_policy:
14764 self.group.alloc_policy = self.op.alloc_policy
14766 if self.op.hv_state:
14767 self.group.hv_state_static = self.new_hv_state
14769 if self.op.disk_state:
14770 self.group.disk_state_static = self.new_disk_state
14772 if self.op.ipolicy:
14773 self.group.ipolicy = self.new_ipolicy
14775 self.cfg.Update(self.group, feedback_fn)
14779 class LUGroupRemove(LogicalUnit):
14780 HPATH = "group-remove"
14781 HTYPE = constants.HTYPE_GROUP
14784 def ExpandNames(self):
14785 # This will raises errors.OpPrereqError on its own:
14786 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14787 self.needed_locks = {
14788 locking.LEVEL_NODEGROUP: [self.group_uuid],
14791 def CheckPrereq(self):
14792 """Check prerequisites.
14794 This checks that the given group name exists as a node group, that is
14795 empty (i.e., contains no nodes), and that is not the last group of the
14799 # Verify that the group is empty.
14800 group_nodes = [node.name
14801 for node in self.cfg.GetAllNodesInfo().values()
14802 if node.group == self.group_uuid]
14805 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14807 (self.op.group_name,
14808 utils.CommaJoin(utils.NiceSort(group_nodes))),
14809 errors.ECODE_STATE)
14811 # Verify the cluster would not be left group-less.
14812 if len(self.cfg.GetNodeGroupList()) == 1:
14813 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14814 " removed" % self.op.group_name,
14815 errors.ECODE_STATE)
14817 def BuildHooksEnv(self):
14818 """Build hooks env.
14822 "GROUP_NAME": self.op.group_name,
14825 def BuildHooksNodes(self):
14826 """Build hooks nodes.
14829 mn = self.cfg.GetMasterNode()
14830 return ([mn], [mn])
14832 def Exec(self, feedback_fn):
14833 """Remove the node group.
14837 self.cfg.RemoveNodeGroup(self.group_uuid)
14838 except errors.ConfigurationError:
14839 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14840 (self.op.group_name, self.group_uuid))
14842 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14845 class LUGroupRename(LogicalUnit):
14846 HPATH = "group-rename"
14847 HTYPE = constants.HTYPE_GROUP
14850 def ExpandNames(self):
14851 # This raises errors.OpPrereqError on its own:
14852 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14854 self.needed_locks = {
14855 locking.LEVEL_NODEGROUP: [self.group_uuid],
14858 def CheckPrereq(self):
14859 """Check prerequisites.
14861 Ensures requested new name is not yet used.
14865 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14866 except errors.OpPrereqError:
14869 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14870 " node group (UUID: %s)" %
14871 (self.op.new_name, new_name_uuid),
14872 errors.ECODE_EXISTS)
14874 def BuildHooksEnv(self):
14875 """Build hooks env.
14879 "OLD_NAME": self.op.group_name,
14880 "NEW_NAME": self.op.new_name,
14883 def BuildHooksNodes(self):
14884 """Build hooks nodes.
14887 mn = self.cfg.GetMasterNode()
14889 all_nodes = self.cfg.GetAllNodesInfo()
14890 all_nodes.pop(mn, None)
14893 run_nodes.extend(node.name for node in all_nodes.values()
14894 if node.group == self.group_uuid)
14896 return (run_nodes, run_nodes)
14898 def Exec(self, feedback_fn):
14899 """Rename the node group.
14902 group = self.cfg.GetNodeGroup(self.group_uuid)
14905 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14906 (self.op.group_name, self.group_uuid))
14908 group.name = self.op.new_name
14909 self.cfg.Update(group, feedback_fn)
14911 return self.op.new_name
14914 class LUGroupEvacuate(LogicalUnit):
14915 HPATH = "group-evacuate"
14916 HTYPE = constants.HTYPE_GROUP
14919 def ExpandNames(self):
14920 # This raises errors.OpPrereqError on its own:
14921 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14923 if self.op.target_groups:
14924 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14925 self.op.target_groups)
14927 self.req_target_uuids = []
14929 if self.group_uuid in self.req_target_uuids:
14930 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14931 " as a target group (targets are %s)" %
14933 utils.CommaJoin(self.req_target_uuids)),
14934 errors.ECODE_INVAL)
14936 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14938 self.share_locks = _ShareAll()
14939 self.needed_locks = {
14940 locking.LEVEL_INSTANCE: [],
14941 locking.LEVEL_NODEGROUP: [],
14942 locking.LEVEL_NODE: [],
14945 def DeclareLocks(self, level):
14946 if level == locking.LEVEL_INSTANCE:
14947 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14949 # Lock instances optimistically, needs verification once node and group
14950 # locks have been acquired
14951 self.needed_locks[locking.LEVEL_INSTANCE] = \
14952 self.cfg.GetNodeGroupInstances(self.group_uuid)
14954 elif level == locking.LEVEL_NODEGROUP:
14955 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14957 if self.req_target_uuids:
14958 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14960 # Lock all groups used by instances optimistically; this requires going
14961 # via the node before it's locked, requiring verification later on
14962 lock_groups.update(group_uuid
14963 for instance_name in
14964 self.owned_locks(locking.LEVEL_INSTANCE)
14966 self.cfg.GetInstanceNodeGroups(instance_name))
14968 # No target groups, need to lock all of them
14969 lock_groups = locking.ALL_SET
14971 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14973 elif level == locking.LEVEL_NODE:
14974 # This will only lock the nodes in the group to be evacuated which
14975 # contain actual instances
14976 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14977 self._LockInstancesNodes()
14979 # Lock all nodes in group to be evacuated and target groups
14980 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14981 assert self.group_uuid in owned_groups
14982 member_nodes = [node_name
14983 for group in owned_groups
14984 for node_name in self.cfg.GetNodeGroup(group).members]
14985 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14987 def CheckPrereq(self):
14988 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14989 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14990 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14992 assert owned_groups.issuperset(self.req_target_uuids)
14993 assert self.group_uuid in owned_groups
14995 # Check if locked instances are still correct
14996 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14998 # Get instance information
14999 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15001 # Check if node groups for locked instances are still correct
15002 _CheckInstancesNodeGroups(self.cfg, self.instances,
15003 owned_groups, owned_nodes, self.group_uuid)
15005 if self.req_target_uuids:
15006 # User requested specific target groups
15007 self.target_uuids = self.req_target_uuids
15009 # All groups except the one to be evacuated are potential targets
15010 self.target_uuids = [group_uuid for group_uuid in owned_groups
15011 if group_uuid != self.group_uuid]
15013 if not self.target_uuids:
15014 raise errors.OpPrereqError("There are no possible target groups",
15015 errors.ECODE_INVAL)
15017 def BuildHooksEnv(self):
15018 """Build hooks env.
15022 "GROUP_NAME": self.op.group_name,
15023 "TARGET_GROUPS": " ".join(self.target_uuids),
15026 def BuildHooksNodes(self):
15027 """Build hooks nodes.
15030 mn = self.cfg.GetMasterNode()
15032 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15034 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15036 return (run_nodes, run_nodes)
15038 def Exec(self, feedback_fn):
15039 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15041 assert self.group_uuid not in self.target_uuids
15043 req = iallocator.IAReqGroupChange(instances=instances,
15044 target_groups=self.target_uuids)
15045 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15047 ial.Run(self.op.iallocator)
15049 if not ial.success:
15050 raise errors.OpPrereqError("Can't compute group evacuation using"
15051 " iallocator '%s': %s" %
15052 (self.op.iallocator, ial.info),
15053 errors.ECODE_NORES)
15055 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15057 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15058 len(jobs), self.op.group_name)
15060 return ResultWithJobs(jobs)
15063 class TagsLU(NoHooksLU): # pylint: disable=W0223
15064 """Generic tags LU.
15066 This is an abstract class which is the parent of all the other tags LUs.
15069 def ExpandNames(self):
15070 self.group_uuid = None
15071 self.needed_locks = {}
15073 if self.op.kind == constants.TAG_NODE:
15074 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15075 lock_level = locking.LEVEL_NODE
15076 lock_name = self.op.name
15077 elif self.op.kind == constants.TAG_INSTANCE:
15078 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15079 lock_level = locking.LEVEL_INSTANCE
15080 lock_name = self.op.name
15081 elif self.op.kind == constants.TAG_NODEGROUP:
15082 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15083 lock_level = locking.LEVEL_NODEGROUP
15084 lock_name = self.group_uuid
15085 elif self.op.kind == constants.TAG_NETWORK:
15086 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15087 lock_level = locking.LEVEL_NETWORK
15088 lock_name = self.network_uuid
15093 if lock_level and getattr(self.op, "use_locking", True):
15094 self.needed_locks[lock_level] = lock_name
15096 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15097 # not possible to acquire the BGL based on opcode parameters)
15099 def CheckPrereq(self):
15100 """Check prerequisites.
15103 if self.op.kind == constants.TAG_CLUSTER:
15104 self.target = self.cfg.GetClusterInfo()
15105 elif self.op.kind == constants.TAG_NODE:
15106 self.target = self.cfg.GetNodeInfo(self.op.name)
15107 elif self.op.kind == constants.TAG_INSTANCE:
15108 self.target = self.cfg.GetInstanceInfo(self.op.name)
15109 elif self.op.kind == constants.TAG_NODEGROUP:
15110 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15111 elif self.op.kind == constants.TAG_NETWORK:
15112 self.target = self.cfg.GetNetwork(self.network_uuid)
15114 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15115 str(self.op.kind), errors.ECODE_INVAL)
15118 class LUTagsGet(TagsLU):
15119 """Returns the tags of a given object.
15124 def ExpandNames(self):
15125 TagsLU.ExpandNames(self)
15127 # Share locks as this is only a read operation
15128 self.share_locks = _ShareAll()
15130 def Exec(self, feedback_fn):
15131 """Returns the tag list.
15134 return list(self.target.GetTags())
15137 class LUTagsSearch(NoHooksLU):
15138 """Searches the tags for a given pattern.
15143 def ExpandNames(self):
15144 self.needed_locks = {}
15146 def CheckPrereq(self):
15147 """Check prerequisites.
15149 This checks the pattern passed for validity by compiling it.
15153 self.re = re.compile(self.op.pattern)
15154 except re.error, err:
15155 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15156 (self.op.pattern, err), errors.ECODE_INVAL)
15158 def Exec(self, feedback_fn):
15159 """Returns the tag list.
15163 tgts = [("/cluster", cfg.GetClusterInfo())]
15164 ilist = cfg.GetAllInstancesInfo().values()
15165 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15166 nlist = cfg.GetAllNodesInfo().values()
15167 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15168 tgts.extend(("/nodegroup/%s" % n.name, n)
15169 for n in cfg.GetAllNodeGroupsInfo().values())
15171 for path, target in tgts:
15172 for tag in target.GetTags():
15173 if self.re.search(tag):
15174 results.append((path, tag))
15178 class LUTagsSet(TagsLU):
15179 """Sets a tag on a given object.
15184 def CheckPrereq(self):
15185 """Check prerequisites.
15187 This checks the type and length of the tag name and value.
15190 TagsLU.CheckPrereq(self)
15191 for tag in self.op.tags:
15192 objects.TaggableObject.ValidateTag(tag)
15194 def Exec(self, feedback_fn):
15199 for tag in self.op.tags:
15200 self.target.AddTag(tag)
15201 except errors.TagError, err:
15202 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15203 self.cfg.Update(self.target, feedback_fn)
15206 class LUTagsDel(TagsLU):
15207 """Delete a list of tags from a given object.
15212 def CheckPrereq(self):
15213 """Check prerequisites.
15215 This checks that we have the given tag.
15218 TagsLU.CheckPrereq(self)
15219 for tag in self.op.tags:
15220 objects.TaggableObject.ValidateTag(tag)
15221 del_tags = frozenset(self.op.tags)
15222 cur_tags = self.target.GetTags()
15224 diff_tags = del_tags - cur_tags
15226 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15227 raise errors.OpPrereqError("Tag(s) %s not found" %
15228 (utils.CommaJoin(diff_names), ),
15229 errors.ECODE_NOENT)
15231 def Exec(self, feedback_fn):
15232 """Remove the tag from the object.
15235 for tag in self.op.tags:
15236 self.target.RemoveTag(tag)
15237 self.cfg.Update(self.target, feedback_fn)
15240 class LUTestDelay(NoHooksLU):
15241 """Sleep for a specified amount of time.
15243 This LU sleeps on the master and/or nodes for a specified amount of
15249 def ExpandNames(self):
15250 """Expand names and set required locks.
15252 This expands the node list, if any.
15255 self.needed_locks = {}
15256 if self.op.on_nodes:
15257 # _GetWantedNodes can be used here, but is not always appropriate to use
15258 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15259 # more information.
15260 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15261 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15263 def _TestDelay(self):
15264 """Do the actual sleep.
15267 if self.op.on_master:
15268 if not utils.TestDelay(self.op.duration):
15269 raise errors.OpExecError("Error during master delay test")
15270 if self.op.on_nodes:
15271 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15272 for node, node_result in result.items():
15273 node_result.Raise("Failure during rpc call to node %s" % node)
15275 def Exec(self, feedback_fn):
15276 """Execute the test delay opcode, with the wanted repetitions.
15279 if self.op.repeat == 0:
15282 top_value = self.op.repeat - 1
15283 for i in range(self.op.repeat):
15284 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15288 class LURestrictedCommand(NoHooksLU):
15289 """Logical unit for executing restricted commands.
15294 def ExpandNames(self):
15296 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15298 self.needed_locks = {
15299 locking.LEVEL_NODE: self.op.nodes,
15301 self.share_locks = {
15302 locking.LEVEL_NODE: not self.op.use_locking,
15305 def CheckPrereq(self):
15306 """Check prerequisites.
15310 def Exec(self, feedback_fn):
15311 """Execute restricted command and return output.
15314 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15316 # Check if correct locks are held
15317 assert set(self.op.nodes).issubset(owned_nodes)
15319 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15323 for node_name in self.op.nodes:
15324 nres = rpcres[node_name]
15326 msg = ("Command '%s' on node '%s' failed: %s" %
15327 (self.op.command, node_name, nres.fail_msg))
15328 result.append((False, msg))
15330 result.append((True, nres.payload))
15335 class LUTestJqueue(NoHooksLU):
15336 """Utility LU to test some aspects of the job queue.
15341 # Must be lower than default timeout for WaitForJobChange to see whether it
15342 # notices changed jobs
15343 _CLIENT_CONNECT_TIMEOUT = 20.0
15344 _CLIENT_CONFIRM_TIMEOUT = 60.0
15347 def _NotifyUsingSocket(cls, cb, errcls):
15348 """Opens a Unix socket and waits for another program to connect.
15351 @param cb: Callback to send socket name to client
15352 @type errcls: class
15353 @param errcls: Exception class to use for errors
15356 # Using a temporary directory as there's no easy way to create temporary
15357 # sockets without writing a custom loop around tempfile.mktemp and
15359 tmpdir = tempfile.mkdtemp()
15361 tmpsock = utils.PathJoin(tmpdir, "sock")
15363 logging.debug("Creating temporary socket at %s", tmpsock)
15364 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15369 # Send details to client
15372 # Wait for client to connect before continuing
15373 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15375 (conn, _) = sock.accept()
15376 except socket.error, err:
15377 raise errcls("Client didn't connect in time (%s)" % err)
15381 # Remove as soon as client is connected
15382 shutil.rmtree(tmpdir)
15384 # Wait for client to close
15387 # pylint: disable=E1101
15388 # Instance of '_socketobject' has no ... member
15389 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15391 except socket.error, err:
15392 raise errcls("Client failed to confirm notification (%s)" % err)
15396 def _SendNotification(self, test, arg, sockname):
15397 """Sends a notification to the client.
15400 @param test: Test name
15401 @param arg: Test argument (depends on test)
15402 @type sockname: string
15403 @param sockname: Socket path
15406 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15408 def _Notify(self, prereq, test, arg):
15409 """Notifies the client of a test.
15412 @param prereq: Whether this is a prereq-phase test
15414 @param test: Test name
15415 @param arg: Test argument (depends on test)
15419 errcls = errors.OpPrereqError
15421 errcls = errors.OpExecError
15423 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15427 def CheckArguments(self):
15428 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15429 self.expandnames_calls = 0
15431 def ExpandNames(self):
15432 checkargs_calls = getattr(self, "checkargs_calls", 0)
15433 if checkargs_calls < 1:
15434 raise errors.ProgrammerError("CheckArguments was not called")
15436 self.expandnames_calls += 1
15438 if self.op.notify_waitlock:
15439 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15441 self.LogInfo("Expanding names")
15443 # Get lock on master node (just to get a lock, not for a particular reason)
15444 self.needed_locks = {
15445 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15448 def Exec(self, feedback_fn):
15449 if self.expandnames_calls < 1:
15450 raise errors.ProgrammerError("ExpandNames was not called")
15452 if self.op.notify_exec:
15453 self._Notify(False, constants.JQT_EXEC, None)
15455 self.LogInfo("Executing")
15457 if self.op.log_messages:
15458 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15459 for idx, msg in enumerate(self.op.log_messages):
15460 self.LogInfo("Sending log message %s", idx + 1)
15461 feedback_fn(constants.JQT_MSGPREFIX + msg)
15462 # Report how many test messages have been sent
15463 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15466 raise errors.OpExecError("Opcode failure was requested")
15471 class LUTestAllocator(NoHooksLU):
15472 """Run allocator tests.
15474 This LU runs the allocator tests
15477 def CheckPrereq(self):
15478 """Check prerequisites.
15480 This checks the opcode parameters depending on the director and mode test.
15483 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15484 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15485 for attr in ["memory", "disks", "disk_template",
15486 "os", "tags", "nics", "vcpus"]:
15487 if not hasattr(self.op, attr):
15488 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15489 attr, errors.ECODE_INVAL)
15490 iname = self.cfg.ExpandInstanceName(self.op.name)
15491 if iname is not None:
15492 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15493 iname, errors.ECODE_EXISTS)
15494 if not isinstance(self.op.nics, list):
15495 raise errors.OpPrereqError("Invalid parameter 'nics'",
15496 errors.ECODE_INVAL)
15497 if not isinstance(self.op.disks, list):
15498 raise errors.OpPrereqError("Invalid parameter 'disks'",
15499 errors.ECODE_INVAL)
15500 for row in self.op.disks:
15501 if (not isinstance(row, dict) or
15502 constants.IDISK_SIZE not in row or
15503 not isinstance(row[constants.IDISK_SIZE], int) or
15504 constants.IDISK_MODE not in row or
15505 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15506 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15507 " parameter", errors.ECODE_INVAL)
15508 if self.op.hypervisor is None:
15509 self.op.hypervisor = self.cfg.GetHypervisorType()
15510 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15511 fname = _ExpandInstanceName(self.cfg, self.op.name)
15512 self.op.name = fname
15513 self.relocate_from = \
15514 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15515 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15516 constants.IALLOCATOR_MODE_NODE_EVAC):
15517 if not self.op.instances:
15518 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15519 self.op.instances = _GetWantedInstances(self, self.op.instances)
15521 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15522 self.op.mode, errors.ECODE_INVAL)
15524 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15525 if self.op.allocator is None:
15526 raise errors.OpPrereqError("Missing allocator name",
15527 errors.ECODE_INVAL)
15528 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15529 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15530 self.op.direction, errors.ECODE_INVAL)
15532 def Exec(self, feedback_fn):
15533 """Run the allocator test.
15536 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15537 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15538 memory=self.op.memory,
15539 disks=self.op.disks,
15540 disk_template=self.op.disk_template,
15544 vcpus=self.op.vcpus,
15545 spindle_use=self.op.spindle_use,
15546 hypervisor=self.op.hypervisor)
15547 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15548 req = iallocator.IAReqRelocate(name=self.op.name,
15549 relocate_from=list(self.relocate_from))
15550 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15551 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15552 target_groups=self.op.target_groups)
15553 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15554 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15555 evac_mode=self.op.evac_mode)
15556 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15557 disk_template = self.op.disk_template
15558 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15559 memory=self.op.memory,
15560 disks=self.op.disks,
15561 disk_template=disk_template,
15565 vcpus=self.op.vcpus,
15566 spindle_use=self.op.spindle_use,
15567 hypervisor=self.op.hypervisor)
15568 for idx in range(self.op.count)]
15569 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15571 raise errors.ProgrammerError("Uncatched mode %s in"
15572 " LUTestAllocator.Exec", self.op.mode)
15574 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15575 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15576 result = ial.in_text
15578 ial.Run(self.op.allocator, validate=False)
15579 result = ial.out_text
15584 class LUNetworkAdd(LogicalUnit):
15585 """Logical unit for creating networks.
15588 HPATH = "network-add"
15589 HTYPE = constants.HTYPE_NETWORK
15592 def BuildHooksNodes(self):
15593 """Build hooks nodes.
15596 mn = self.cfg.GetMasterNode()
15597 return ([mn], [mn])
15599 def ExpandNames(self):
15600 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15601 self.needed_locks = {}
15602 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15604 def CheckPrereq(self):
15605 """Check prerequisites.
15607 This checks that the given group name is not an existing node group
15611 if self.op.network is None:
15612 raise errors.OpPrereqError("Network must be given",
15613 errors.ECODE_INVAL)
15615 uuid = self.cfg.LookupNetwork(self.op.network_name)
15618 raise errors.OpPrereqError("Network '%s' already defined" %
15619 self.op.network, errors.ECODE_EXISTS)
15621 if self.op.mac_prefix:
15622 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15624 # Check tag validity
15625 for tag in self.op.tags:
15626 objects.TaggableObject.ValidateTag(tag)
15628 def BuildHooksEnv(self):
15629 """Build hooks env.
15633 "name": self.op.network_name,
15634 "subnet": self.op.network,
15635 "gateway": self.op.gateway,
15636 "network6": self.op.network6,
15637 "gateway6": self.op.gateway6,
15638 "mac_prefix": self.op.mac_prefix,
15639 "network_type": self.op.network_type,
15640 "tags": self.op.tags,
15642 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15644 def Exec(self, feedback_fn):
15645 """Add the ip pool to the cluster.
15648 nobj = objects.Network(name=self.op.network_name,
15649 network=self.op.network,
15650 gateway=self.op.gateway,
15651 network6=self.op.network6,
15652 gateway6=self.op.gateway6,
15653 mac_prefix=self.op.mac_prefix,
15654 network_type=self.op.network_type,
15655 uuid=self.network_uuid,
15657 # Initialize the associated address pool
15659 pool = network.AddressPool.InitializeNetwork(nobj)
15660 except errors.AddressPoolError, e:
15661 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15663 # Check if we need to reserve the nodes and the cluster master IP
15664 # These may not be allocated to any instances in routed mode, as
15665 # they wouldn't function anyway.
15666 for node in self.cfg.GetAllNodesInfo().values():
15667 for ip in [node.primary_ip, node.secondary_ip]:
15670 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15672 except errors.AddressPoolError:
15675 master_ip = self.cfg.GetClusterInfo().master_ip
15677 pool.Reserve(master_ip)
15678 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15679 except errors.AddressPoolError:
15682 if self.op.add_reserved_ips:
15683 for ip in self.op.add_reserved_ips:
15685 pool.Reserve(ip, external=True)
15686 except errors.AddressPoolError, e:
15687 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15690 for tag in self.op.tags:
15693 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15694 del self.remove_locks[locking.LEVEL_NETWORK]
15697 class LUNetworkRemove(LogicalUnit):
15698 HPATH = "network-remove"
15699 HTYPE = constants.HTYPE_NETWORK
15702 def ExpandNames(self):
15703 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15705 if not self.network_uuid:
15706 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15707 errors.ECODE_INVAL)
15708 self.needed_locks = {
15709 locking.LEVEL_NETWORK: [self.network_uuid],
15712 def CheckPrereq(self):
15713 """Check prerequisites.
15715 This checks that the given network name exists as a network, that is
15716 empty (i.e., contains no nodes), and that is not the last group of the
15721 # Verify that the network is not conncted.
15722 node_groups = [group.name
15723 for group in self.cfg.GetAllNodeGroupsInfo().values()
15724 for net in group.networks.keys()
15725 if net == self.network_uuid]
15728 self.LogWarning("Nework '%s' is connected to the following"
15729 " node groups: %s" % (self.op.network_name,
15730 utils.CommaJoin(utils.NiceSort(node_groups))))
15731 raise errors.OpPrereqError("Network still connected",
15732 errors.ECODE_STATE)
15734 def BuildHooksEnv(self):
15735 """Build hooks env.
15739 "NETWORK_NAME": self.op.network_name,
15742 def BuildHooksNodes(self):
15743 """Build hooks nodes.
15746 mn = self.cfg.GetMasterNode()
15747 return ([mn], [mn])
15749 def Exec(self, feedback_fn):
15750 """Remove the network.
15754 self.cfg.RemoveNetwork(self.network_uuid)
15755 except errors.ConfigurationError:
15756 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15757 (self.op.network_name, self.network_uuid))
15760 class LUNetworkSetParams(LogicalUnit):
15761 """Modifies the parameters of a network.
15764 HPATH = "network-modify"
15765 HTYPE = constants.HTYPE_NETWORK
15768 def CheckArguments(self):
15769 if (self.op.gateway and
15770 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15771 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15772 " at once", errors.ECODE_INVAL)
15774 def ExpandNames(self):
15775 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15776 self.network = self.cfg.GetNetwork(self.network_uuid)
15777 if self.network is None:
15778 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15779 (self.op.network_name, self.network_uuid),
15780 errors.ECODE_INVAL)
15781 self.needed_locks = {
15782 locking.LEVEL_NETWORK: [self.network_uuid],
15785 def CheckPrereq(self):
15786 """Check prerequisites.
15789 self.gateway = self.network.gateway
15790 self.network_type = self.network.network_type
15791 self.mac_prefix = self.network.mac_prefix
15792 self.network6 = self.network.network6
15793 self.gateway6 = self.network.gateway6
15794 self.tags = self.network.tags
15796 self.pool = network.AddressPool(self.network)
15798 if self.op.gateway:
15799 if self.op.gateway == constants.VALUE_NONE:
15800 self.gateway = None
15802 self.gateway = self.op.gateway
15803 if self.pool.IsReserved(self.gateway):
15804 raise errors.OpPrereqError("%s is already reserved" %
15805 self.gateway, errors.ECODE_INVAL)
15807 if self.op.network_type:
15808 if self.op.network_type == constants.VALUE_NONE:
15809 self.network_type = None
15811 self.network_type = self.op.network_type
15813 if self.op.mac_prefix:
15814 if self.op.mac_prefix == constants.VALUE_NONE:
15815 self.mac_prefix = None
15817 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15818 self.mac_prefix = self.op.mac_prefix
15820 if self.op.gateway6:
15821 if self.op.gateway6 == constants.VALUE_NONE:
15822 self.gateway6 = None
15824 self.gateway6 = self.op.gateway6
15826 if self.op.network6:
15827 if self.op.network6 == constants.VALUE_NONE:
15828 self.network6 = None
15830 self.network6 = self.op.network6
15832 def BuildHooksEnv(self):
15833 """Build hooks env.
15837 "name": self.op.network_name,
15838 "subnet": self.network.network,
15839 "gateway": self.gateway,
15840 "network6": self.network6,
15841 "gateway6": self.gateway6,
15842 "mac_prefix": self.mac_prefix,
15843 "network_type": self.network_type,
15846 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15848 def BuildHooksNodes(self):
15849 """Build hooks nodes.
15852 mn = self.cfg.GetMasterNode()
15853 return ([mn], [mn])
15855 def Exec(self, feedback_fn):
15856 """Modifies the network.
15859 #TODO: reserve/release via temporary reservation manager
15860 # extend cfg.ReserveIp/ReleaseIp with the external flag
15861 if self.op.gateway:
15862 if self.gateway == self.network.gateway:
15863 self.LogWarning("Gateway is already %s" % self.gateway)
15866 self.pool.Reserve(self.gateway, external=True)
15867 if self.network.gateway:
15868 self.pool.Release(self.network.gateway, external=True)
15869 self.network.gateway = self.gateway
15871 if self.op.add_reserved_ips:
15872 for ip in self.op.add_reserved_ips:
15874 if self.pool.IsReserved(ip):
15875 self.LogWarning("IP %s is already reserved" % ip)
15877 self.pool.Reserve(ip, external=True)
15878 except errors.AddressPoolError, e:
15879 self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
15881 if self.op.remove_reserved_ips:
15882 for ip in self.op.remove_reserved_ips:
15883 if ip == self.network.gateway:
15884 self.LogWarning("Cannot unreserve Gateway's IP")
15887 if not self.pool.IsReserved(ip):
15888 self.LogWarning("IP %s is already unreserved" % ip)
15890 self.pool.Release(ip, external=True)
15891 except errors.AddressPoolError, e:
15892 self.LogWarning("Cannot release ip %s. %s" % (ip, e))
15894 if self.op.mac_prefix:
15895 self.network.mac_prefix = self.mac_prefix
15897 if self.op.network6:
15898 self.network.network6 = self.network6
15900 if self.op.gateway6:
15901 self.network.gateway6 = self.gateway6
15903 if self.op.network_type:
15904 self.network.network_type = self.network_type
15906 self.pool.Validate()
15908 self.cfg.Update(self.network, feedback_fn)
15911 class _NetworkQuery(_QueryBase):
15912 FIELDS = query.NETWORK_FIELDS
15914 def ExpandNames(self, lu):
15915 lu.needed_locks = {}
15917 self._all_networks = lu.cfg.GetAllNetworksInfo()
15918 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15921 self.wanted = [name_to_uuid[name]
15922 for name in utils.NiceSort(name_to_uuid.keys())]
15924 # Accept names to be either names or UUIDs.
15927 all_uuid = frozenset(self._all_networks.keys())
15929 for name in self.names:
15930 if name in all_uuid:
15931 self.wanted.append(name)
15932 elif name in name_to_uuid:
15933 self.wanted.append(name_to_uuid[name])
15935 missing.append(name)
15938 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15939 errors.ECODE_NOENT)
15941 def DeclareLocks(self, lu, level):
15944 def _GetQueryData(self, lu):
15945 """Computes the list of networks and their attributes.
15948 do_instances = query.NETQ_INST in self.requested_data
15949 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15950 do_stats = query.NETQ_STATS in self.requested_data
15952 network_to_groups = None
15953 network_to_instances = None
15956 # For NETQ_GROUP, we need to map network->[groups]
15958 all_groups = lu.cfg.GetAllNodeGroupsInfo()
15959 network_to_groups = dict((uuid, []) for uuid in self.wanted)
15962 all_instances = lu.cfg.GetAllInstancesInfo()
15963 all_nodes = lu.cfg.GetAllNodesInfo()
15964 network_to_instances = dict((uuid, []) for uuid in self.wanted)
15966 for group in all_groups.values():
15968 group_nodes = [node.name for node in all_nodes.values() if
15969 node.group == group.uuid]
15970 group_instances = [instance for instance in all_instances.values()
15971 if instance.primary_node in group_nodes]
15973 for net_uuid in group.networks.keys():
15974 if net_uuid in network_to_groups:
15975 netparams = group.networks[net_uuid]
15976 mode = netparams[constants.NIC_MODE]
15977 link = netparams[constants.NIC_LINK]
15978 info = group.name + "(" + mode + ", " + link + ")"
15979 network_to_groups[net_uuid].append(info)
15982 for instance in group_instances:
15983 for nic in instance.nics:
15984 if nic.network == self._all_networks[net_uuid].name:
15985 network_to_instances[net_uuid].append(instance.name)
15990 for uuid, net in self._all_networks.items():
15991 if uuid in self.wanted:
15992 pool = network.AddressPool(net)
15994 "free_count": pool.GetFreeCount(),
15995 "reserved_count": pool.GetReservedCount(),
15996 "map": pool.GetMap(),
15997 "external_reservations":
15998 utils.CommaJoin(pool.GetExternalReservations()),
16001 return query.NetworkQueryData([self._all_networks[uuid]
16002 for uuid in self.wanted],
16004 network_to_instances,
16008 class LUNetworkQuery(NoHooksLU):
16009 """Logical unit for querying networks.
16014 def CheckArguments(self):
16015 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16016 self.op.output_fields, False)
16018 def ExpandNames(self):
16019 self.nq.ExpandNames(self)
16021 def Exec(self, feedback_fn):
16022 return self.nq.OldStyleQuery(self)
16025 class LUNetworkConnect(LogicalUnit):
16026 """Connect a network to a nodegroup
16029 HPATH = "network-connect"
16030 HTYPE = constants.HTYPE_NETWORK
16033 def ExpandNames(self):
16034 self.network_name = self.op.network_name
16035 self.group_name = self.op.group_name
16036 self.network_mode = self.op.network_mode
16037 self.network_link = self.op.network_link
16039 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16040 self.network = self.cfg.GetNetwork(self.network_uuid)
16041 if self.network is None:
16042 raise errors.OpPrereqError("Network %s does not exist" %
16043 self.network_name, errors.ECODE_INVAL)
16045 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16046 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16047 if self.group is None:
16048 raise errors.OpPrereqError("Group %s does not exist" %
16049 self.group_name, errors.ECODE_INVAL)
16051 self.needed_locks = {
16052 locking.LEVEL_INSTANCE: [],
16053 locking.LEVEL_NODEGROUP: [self.group_uuid],
16055 self.share_locks[locking.LEVEL_INSTANCE] = 1
16057 def DeclareLocks(self, level):
16058 if level == locking.LEVEL_INSTANCE:
16059 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16061 # Lock instances optimistically, needs verification once group lock has
16063 self.needed_locks[locking.LEVEL_INSTANCE] = \
16064 self.cfg.GetNodeGroupInstances(self.group_uuid)
16066 def BuildHooksEnv(self):
16068 "GROUP_NAME": self.group_name,
16069 "GROUP_NETWORK_MODE": self.network_mode,
16070 "GROUP_NETWORK_LINK": self.network_link,
16072 ret.update(_BuildNetworkHookEnvByObject(self.network))
16075 def BuildHooksNodes(self):
16076 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16077 return (nodes, nodes)
16079 def CheckPrereq(self):
16080 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16084 constants.NIC_MODE: self.network_mode,
16085 constants.NIC_LINK: self.network_link,
16087 objects.NIC.CheckParameterSyntax(self.netparams)
16089 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16090 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16091 self.connected = False
16092 if self.network_uuid in self.group.networks:
16093 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16094 (self.network_name, self.group.name))
16095 self.connected = True
16098 pool = network.AddressPool(self.network)
16099 if self.op.conflicts_check:
16100 groupinstances = []
16101 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16102 groupinstances.append(self.cfg.GetInstanceInfo(n))
16103 instances = [(instance.name, idx, nic.ip)
16104 for instance in groupinstances
16105 for idx, nic in enumerate(instance.nics)
16106 if (not nic.network and pool.Contains(nic.ip))]
16108 self.LogWarning("Following occurences use IPs from network %s"
16109 " that is about to connect to nodegroup %s: %s" %
16110 (self.network_name, self.group.name,
16112 raise errors.OpPrereqError("Conflicting IPs found."
16113 " Please remove/modify"
16114 " corresponding NICs",
16115 errors.ECODE_INVAL)
16117 def Exec(self, feedback_fn):
16121 self.group.networks[self.network_uuid] = self.netparams
16122 self.cfg.Update(self.group, feedback_fn)
16125 class LUNetworkDisconnect(LogicalUnit):
16126 """Disconnect a network to a nodegroup
16129 HPATH = "network-disconnect"
16130 HTYPE = constants.HTYPE_NETWORK
16133 def ExpandNames(self):
16134 self.network_name = self.op.network_name
16135 self.group_name = self.op.group_name
16137 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16138 self.network = self.cfg.GetNetwork(self.network_uuid)
16139 if self.network is None:
16140 raise errors.OpPrereqError("Network %s does not exist" %
16141 self.network_name, errors.ECODE_INVAL)
16143 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16144 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16145 if self.group is None:
16146 raise errors.OpPrereqError("Group %s does not exist" %
16147 self.group_name, errors.ECODE_INVAL)
16149 self.needed_locks = {
16150 locking.LEVEL_INSTANCE: [],
16151 locking.LEVEL_NODEGROUP: [self.group_uuid],
16153 self.share_locks[locking.LEVEL_INSTANCE] = 1
16155 def DeclareLocks(self, level):
16156 if level == locking.LEVEL_INSTANCE:
16157 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16159 # Lock instances optimistically, needs verification once group lock has
16161 self.needed_locks[locking.LEVEL_INSTANCE] = \
16162 self.cfg.GetNodeGroupInstances(self.group_uuid)
16164 def BuildHooksEnv(self):
16166 "GROUP_NAME": self.group_name,
16168 ret.update(_BuildNetworkHookEnvByObject(self.network))
16171 def BuildHooksNodes(self):
16172 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16173 return (nodes, nodes)
16175 def CheckPrereq(self):
16176 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16179 self.connected = True
16180 if self.network_uuid not in self.group.networks:
16181 self.LogWarning("Network '%s' is"
16182 " not mapped to group '%s'" %
16183 (self.network_name, self.group.name))
16184 self.connected = False
16187 if self.op.conflicts_check:
16188 groupinstances = []
16189 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16190 groupinstances.append(self.cfg.GetInstanceInfo(n))
16191 instances = [(instance.name, idx, nic.ip)
16192 for instance in groupinstances
16193 for idx, nic in enumerate(instance.nics)
16194 if nic.network == self.network_name]
16196 self.LogWarning("Following occurences use IPs from network %s"
16197 " that is about to disconnected from the nodegroup"
16199 (self.network_name, self.group.name,
16201 raise errors.OpPrereqError("Conflicting IPs."
16202 " Please remove/modify"
16203 " corresponding NICS",
16204 errors.ECODE_INVAL)
16206 def Exec(self, feedback_fn):
16207 if not self.connected:
16210 del self.group.networks[self.network_uuid]
16211 self.cfg.Update(self.group, feedback_fn)
16214 #: Query type implementations
16216 constants.QR_CLUSTER: _ClusterQuery,
16217 constants.QR_INSTANCE: _InstanceQuery,
16218 constants.QR_NODE: _NodeQuery,
16219 constants.QR_GROUP: _GroupQuery,
16220 constants.QR_NETWORK: _NetworkQuery,
16221 constants.QR_OS: _OsQuery,
16222 constants.QR_EXPORT: _ExportQuery,
16225 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16228 def _GetQueryImplementation(name):
16229 """Returns the implemtnation for a query type.
16231 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16235 return _QUERY_IMPL[name]
16237 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16238 errors.ECODE_INVAL)
16241 def _CheckForConflictingIp(lu, ip, node):
16242 """In case of conflicting ip raise error.
16245 @param ip: ip address
16247 @param node: node name
16250 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16251 if conf_net is not None:
16252 raise errors.OpPrereqError("Conflicting IP found:"
16253 " %s <> %s." % (ip, conf_net),
16254 errors.ECODE_INVAL)
16256 return (None, None)