4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _AnnotateDiskParams(instance, devs, cfg):
589 """Little helper wrapper to the rpc annotation method.
591 @param instance: The instance object
592 @type devs: List of L{objects.Disk}
593 @param devs: The root devices (not any of its children!)
594 @param cfg: The config object
595 @returns The annotated disk copies
596 @see L{rpc.AnnotateDiskParams}
599 return rpc.AnnotateDiskParams(instance.disk_template, devs,
600 cfg.GetInstanceDiskParams(instance))
603 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
605 """Checks if node groups for locked instances are still correct.
607 @type cfg: L{config.ConfigWriter}
608 @param cfg: Cluster configuration
609 @type instances: dict; string as key, L{objects.Instance} as value
610 @param instances: Dictionary, instance name as key, instance object as value
611 @type owned_groups: iterable of string
612 @param owned_groups: List of owned groups
613 @type owned_nodes: iterable of string
614 @param owned_nodes: List of owned nodes
615 @type cur_group_uuid: string or None
616 @param cur_group_uuid: Optional group UUID to check against instance's groups
619 for (name, inst) in instances.items():
620 assert owned_nodes.issuperset(inst.all_nodes), \
621 "Instance %s's nodes changed while we kept the lock" % name
623 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
625 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
626 "Instance %s has no node in group %s" % (name, cur_group_uuid)
629 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
631 """Checks if the owned node groups are still correct for an instance.
633 @type cfg: L{config.ConfigWriter}
634 @param cfg: The cluster configuration
635 @type instance_name: string
636 @param instance_name: Instance name
637 @type owned_groups: set or frozenset
638 @param owned_groups: List of currently owned node groups
639 @type primary_only: boolean
640 @param primary_only: Whether to check node groups for only the primary node
643 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
645 if not owned_groups.issuperset(inst_groups):
646 raise errors.OpPrereqError("Instance %s's node groups changed since"
647 " locks were acquired, current groups are"
648 " are '%s', owning groups '%s'; retry the"
651 utils.CommaJoin(inst_groups),
652 utils.CommaJoin(owned_groups)),
658 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
659 """Checks if the instances in a node group are still correct.
661 @type cfg: L{config.ConfigWriter}
662 @param cfg: The cluster configuration
663 @type group_uuid: string
664 @param group_uuid: Node group UUID
665 @type owned_instances: set or frozenset
666 @param owned_instances: List of currently owned instances
669 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
670 if owned_instances != wanted_instances:
671 raise errors.OpPrereqError("Instances in node group '%s' changed since"
672 " locks were acquired, wanted '%s', have '%s';"
673 " retry the operation" %
675 utils.CommaJoin(wanted_instances),
676 utils.CommaJoin(owned_instances)),
679 return wanted_instances
682 def _SupportsOob(cfg, node):
683 """Tells if node supports OOB.
685 @type cfg: L{config.ConfigWriter}
686 @param cfg: The cluster configuration
687 @type node: L{objects.Node}
688 @param node: The node
689 @return: The OOB script if supported or an empty string otherwise
692 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
695 def _CopyLockList(names):
696 """Makes a copy of a list of lock names.
698 Handles L{locking.ALL_SET} correctly.
701 if names == locking.ALL_SET:
702 return locking.ALL_SET
707 def _GetWantedNodes(lu, nodes):
708 """Returns list of checked and expanded node names.
710 @type lu: L{LogicalUnit}
711 @param lu: the logical unit on whose behalf we execute
713 @param nodes: list of node names or None for all nodes
715 @return: the list of nodes, sorted
716 @raise errors.ProgrammerError: if the nodes parameter is wrong type
720 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
722 return utils.NiceSort(lu.cfg.GetNodeList())
725 def _GetWantedInstances(lu, instances):
726 """Returns list of checked and expanded instance names.
728 @type lu: L{LogicalUnit}
729 @param lu: the logical unit on whose behalf we execute
730 @type instances: list
731 @param instances: list of instance names or None for all instances
733 @return: the list of instances, sorted
734 @raise errors.OpPrereqError: if the instances parameter is wrong type
735 @raise errors.OpPrereqError: if any of the passed instances is not found
739 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
741 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
745 def _GetUpdatedParams(old_params, update_dict,
746 use_default=True, use_none=False):
747 """Return the new version of a parameter dictionary.
749 @type old_params: dict
750 @param old_params: old parameters
751 @type update_dict: dict
752 @param update_dict: dict containing new parameter values, or
753 constants.VALUE_DEFAULT to reset the parameter to its default
755 @param use_default: boolean
756 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
757 values as 'to be deleted' values
758 @param use_none: boolean
759 @type use_none: whether to recognise C{None} values as 'to be
762 @return: the new parameter dictionary
765 params_copy = copy.deepcopy(old_params)
766 for key, val in update_dict.iteritems():
767 if ((use_default and val == constants.VALUE_DEFAULT) or
768 (use_none and val is None)):
774 params_copy[key] = val
778 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
779 """Return the new version of a instance policy.
781 @param group_policy: whether this policy applies to a group and thus
782 we should support removal of policy entries
785 use_none = use_default = group_policy
786 ipolicy = copy.deepcopy(old_ipolicy)
787 for key, value in new_ipolicy.items():
788 if key not in constants.IPOLICY_ALL_KEYS:
789 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
791 if key in constants.IPOLICY_ISPECS:
792 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
793 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
795 use_default=use_default)
797 if (not value or value == [constants.VALUE_DEFAULT] or
798 value == constants.VALUE_DEFAULT):
802 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
803 " on the cluster'" % key,
806 if key in constants.IPOLICY_PARAMETERS:
807 # FIXME: we assume all such values are float
809 ipolicy[key] = float(value)
810 except (TypeError, ValueError), err:
811 raise errors.OpPrereqError("Invalid value for attribute"
812 " '%s': '%s', error: %s" %
813 (key, value, err), errors.ECODE_INVAL)
815 # FIXME: we assume all others are lists; this should be redone
817 ipolicy[key] = list(value)
819 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
820 except errors.ConfigurationError, err:
821 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
826 def _UpdateAndVerifySubDict(base, updates, type_check):
827 """Updates and verifies a dict with sub dicts of the same type.
829 @param base: The dict with the old data
830 @param updates: The dict with the new data
831 @param type_check: Dict suitable to ForceDictType to verify correct types
832 @returns: A new dict with updated and verified values
836 new = _GetUpdatedParams(old, value)
837 utils.ForceDictType(new, type_check)
840 ret = copy.deepcopy(base)
841 ret.update(dict((key, fn(base.get(key, {}), value))
842 for key, value in updates.items()))
846 def _MergeAndVerifyHvState(op_input, obj_input):
847 """Combines the hv state from an opcode with the one of the object
849 @param op_input: The input dict from the opcode
850 @param obj_input: The input dict from the objects
851 @return: The verified and updated dict
855 invalid_hvs = set(op_input) - constants.HYPER_TYPES
857 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
858 " %s" % utils.CommaJoin(invalid_hvs),
860 if obj_input is None:
862 type_check = constants.HVSTS_PARAMETER_TYPES
863 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
868 def _MergeAndVerifyDiskState(op_input, obj_input):
869 """Combines the disk state from an opcode with the one of the object
871 @param op_input: The input dict from the opcode
872 @param obj_input: The input dict from the objects
873 @return: The verified and updated dict
876 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
878 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
879 utils.CommaJoin(invalid_dst),
881 type_check = constants.DSS_PARAMETER_TYPES
882 if obj_input is None:
884 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
886 for key, value in op_input.items())
891 def _ReleaseLocks(lu, level, names=None, keep=None):
892 """Releases locks owned by an LU.
894 @type lu: L{LogicalUnit}
895 @param level: Lock level
896 @type names: list or None
897 @param names: Names of locks to release
898 @type keep: list or None
899 @param keep: Names of locks to retain
902 assert not (keep is not None and names is not None), \
903 "Only one of the 'names' and the 'keep' parameters can be given"
905 if names is not None:
906 should_release = names.__contains__
908 should_release = lambda name: name not in keep
910 should_release = None
912 owned = lu.owned_locks(level)
914 # Not owning any lock at this level, do nothing
921 # Determine which locks to release
923 if should_release(name):
928 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
930 # Release just some locks
931 lu.glm.release(level, names=release)
933 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
936 lu.glm.release(level)
938 assert not lu.glm.is_owned(level), "No locks should be owned"
941 def _MapInstanceDisksToNodes(instances):
942 """Creates a map from (node, volume) to instance name.
944 @type instances: list of L{objects.Instance}
945 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
948 return dict(((node, vol), inst.name)
949 for inst in instances
950 for (node, vols) in inst.MapLVsByNode().items()
954 def _RunPostHook(lu, node_name):
955 """Runs the post-hook for an opcode on a single node.
958 hm = lu.proc.BuildHooksManager(lu)
960 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 except Exception, err: # pylint: disable=W0703
962 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
965 def _CheckOutputFields(static, dynamic, selected):
966 """Checks whether all selected fields are valid.
968 @type static: L{utils.FieldSet}
969 @param static: static fields set
970 @type dynamic: L{utils.FieldSet}
971 @param dynamic: dynamic fields set
978 delta = f.NonMatching(selected)
980 raise errors.OpPrereqError("Unknown output fields selected: %s"
981 % ",".join(delta), errors.ECODE_INVAL)
984 def _CheckGlobalHvParams(params):
985 """Validates that given hypervisor params are not global ones.
987 This will ensure that instances don't get customised versions of
991 used_globals = constants.HVC_GLOBALS.intersection(params)
993 msg = ("The following hypervisor parameters are global and cannot"
994 " be customized at instance level, please modify them at"
995 " cluster level: %s" % utils.CommaJoin(used_globals))
996 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
999 def _CheckNodeOnline(lu, node, msg=None):
1000 """Ensure that a given node is online.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @param msg: if passed, should be a message to replace the default one
1005 @raise errors.OpPrereqError: if the node is offline
1009 msg = "Can't use offline node"
1010 if lu.cfg.GetNodeInfo(node).offline:
1011 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014 def _CheckNodeNotDrained(lu, node):
1015 """Ensure that a given node is not drained.
1017 @param lu: the LU on behalf of which we make the check
1018 @param node: the node to check
1019 @raise errors.OpPrereqError: if the node is drained
1022 if lu.cfg.GetNodeInfo(node).drained:
1023 raise errors.OpPrereqError("Can't use drained node %s" % node,
1027 def _CheckNodeVmCapable(lu, node):
1028 """Ensure that a given node is vm capable.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is not vm capable
1035 if not lu.cfg.GetNodeInfo(node).vm_capable:
1036 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041 """Ensure that a node supports a given OS.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @param os_name: the OS to query about
1046 @param force_variant: whether to ignore variant errors
1047 @raise errors.OpPrereqError: if the node is not supporting the OS
1050 result = lu.rpc.call_os_get(node, os_name)
1051 result.Raise("OS '%s' not in supported OS list for node %s" %
1053 prereq=True, ecode=errors.ECODE_INVAL)
1054 if not force_variant:
1055 _CheckOSVariant(result.payload, os_name)
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059 """Ensure that a node has the given secondary ip.
1061 @type lu: L{LogicalUnit}
1062 @param lu: the LU on behalf of which we make the check
1064 @param node: the node to check
1065 @type secondary_ip: string
1066 @param secondary_ip: the ip to check
1067 @type prereq: boolean
1068 @param prereq: whether to throw a prerequisite or an execute error
1069 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074 result.Raise("Failure checking secondary ip on node %s" % node,
1075 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 if not result.payload:
1077 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078 " please fix and re-run this command" % secondary_ip)
1080 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082 raise errors.OpExecError(msg)
1085 def _GetClusterDomainSecret():
1086 """Reads the cluster domain secret.
1089 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094 """Ensure that an instance is in one of the required states.
1096 @param lu: the LU on behalf of which we make the check
1097 @param instance: the instance to check
1098 @param msg: if passed, should be a message to replace the default one
1099 @raise errors.OpPrereqError: if the instance is not in the required state
1103 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104 if instance.admin_state not in req_states:
1105 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106 (instance.name, instance.admin_state, msg),
1109 if constants.ADMINST_UP not in req_states:
1110 pnode = instance.primary_node
1111 if not lu.cfg.GetNodeInfo(pnode).offline:
1112 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1113 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1114 prereq=True, ecode=errors.ECODE_ENVIRON)
1115 if instance.name in ins_l.payload:
1116 raise errors.OpPrereqError("Instance %s is running, %s" %
1117 (instance.name, msg), errors.ECODE_STATE)
1119 lu.LogWarning("Primary node offline, ignoring check that instance"
1123 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1124 """Computes if value is in the desired range.
1126 @param name: name of the parameter for which we perform the check
1127 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1129 @param ipolicy: dictionary containing min, max and std values
1130 @param value: actual value that we want to use
1131 @return: None or element not meeting the criteria
1135 if value in [None, constants.VALUE_AUTO]:
1137 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1138 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1139 if value > max_v or min_v > value:
1141 fqn = "%s/%s" % (name, qualifier)
1144 return ("%s value %s is not in range [%s, %s]" %
1145 (fqn, value, min_v, max_v))
1149 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1150 nic_count, disk_sizes, spindle_use,
1151 _compute_fn=_ComputeMinMaxSpec):
1152 """Verifies ipolicy against provided specs.
1155 @param ipolicy: The ipolicy
1157 @param mem_size: The memory size
1158 @type cpu_count: int
1159 @param cpu_count: Used cpu cores
1160 @type disk_count: int
1161 @param disk_count: Number of disks used
1162 @type nic_count: int
1163 @param nic_count: Number of nics used
1164 @type disk_sizes: list of ints
1165 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1166 @type spindle_use: int
1167 @param spindle_use: The number of spindles this instance uses
1168 @param _compute_fn: The compute function (unittest only)
1169 @return: A list of violations, or an empty list of no violations are found
1172 assert disk_count == len(disk_sizes)
1175 (constants.ISPEC_MEM_SIZE, "", mem_size),
1176 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1177 (constants.ISPEC_DISK_COUNT, "", disk_count),
1178 (constants.ISPEC_NIC_COUNT, "", nic_count),
1179 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1180 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1181 for idx, d in enumerate(disk_sizes)]
1184 (_compute_fn(name, qualifier, ipolicy, value)
1185 for (name, qualifier, value) in test_settings))
1188 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1189 _compute_fn=_ComputeIPolicySpecViolation):
1190 """Compute if instance meets the specs of ipolicy.
1193 @param ipolicy: The ipolicy to verify against
1194 @type instance: L{objects.Instance}
1195 @param instance: The instance to verify
1196 @param _compute_fn: The function to verify ipolicy (unittest only)
1197 @see: L{_ComputeIPolicySpecViolation}
1200 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1201 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1202 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1203 disk_count = len(instance.disks)
1204 disk_sizes = [disk.size for disk in instance.disks]
1205 nic_count = len(instance.nics)
1207 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1208 disk_sizes, spindle_use)
1211 def _ComputeIPolicyInstanceSpecViolation(
1212 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1213 """Compute if instance specs meets the specs of ipolicy.
1216 @param ipolicy: The ipolicy to verify against
1217 @param instance_spec: dict
1218 @param instance_spec: The instance spec to verify
1219 @param _compute_fn: The function to verify ipolicy (unittest only)
1220 @see: L{_ComputeIPolicySpecViolation}
1223 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1224 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1225 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1226 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1227 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1228 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1230 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1231 disk_sizes, spindle_use)
1234 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1236 _compute_fn=_ComputeIPolicyInstanceViolation):
1237 """Compute if instance meets the specs of the new target group.
1239 @param ipolicy: The ipolicy to verify
1240 @param instance: The instance object to verify
1241 @param current_group: The current group of the instance
1242 @param target_group: The new group of the instance
1243 @param _compute_fn: The function to verify ipolicy (unittest only)
1244 @see: L{_ComputeIPolicySpecViolation}
1247 if current_group == target_group:
1250 return _compute_fn(ipolicy, instance)
1253 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1254 _compute_fn=_ComputeIPolicyNodeViolation):
1255 """Checks that the target node is correct in terms of instance policy.
1257 @param ipolicy: The ipolicy to verify
1258 @param instance: The instance object to verify
1259 @param node: The new node to relocate
1260 @param ignore: Ignore violations of the ipolicy
1261 @param _compute_fn: The function to verify ipolicy (unittest only)
1262 @see: L{_ComputeIPolicySpecViolation}
1265 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1266 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1269 msg = ("Instance does not meet target node group's (%s) instance"
1270 " policy: %s") % (node.group, utils.CommaJoin(res))
1274 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1277 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1278 """Computes a set of any instances that would violate the new ipolicy.
1280 @param old_ipolicy: The current (still in-place) ipolicy
1281 @param new_ipolicy: The new (to become) ipolicy
1282 @param instances: List of instances to verify
1283 @return: A list of instances which violates the new ipolicy but
1287 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1288 _ComputeViolatingInstances(old_ipolicy, instances))
1291 def _ExpandItemName(fn, name, kind):
1292 """Expand an item name.
1294 @param fn: the function to use for expansion
1295 @param name: requested item name
1296 @param kind: text description ('Node' or 'Instance')
1297 @return: the resolved (full) name
1298 @raise errors.OpPrereqError: if the item is not found
1301 full_name = fn(name)
1302 if full_name is None:
1303 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1308 def _ExpandNodeName(cfg, name):
1309 """Wrapper over L{_ExpandItemName} for nodes."""
1310 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1313 def _ExpandInstanceName(cfg, name):
1314 """Wrapper over L{_ExpandItemName} for instance."""
1315 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1318 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1319 network_type, mac_prefix, tags):
1320 """Builds network related env variables for hooks
1322 This builds the hook environment from individual variables.
1325 @param name: the name of the network
1326 @type subnet: string
1327 @param subnet: the ipv4 subnet
1328 @type gateway: string
1329 @param gateway: the ipv4 gateway
1330 @type network6: string
1331 @param network6: the ipv6 subnet
1332 @type gateway6: string
1333 @param gateway6: the ipv6 gateway
1334 @type network_type: string
1335 @param network_type: the type of the network
1336 @type mac_prefix: string
1337 @param mac_prefix: the mac_prefix
1339 @param tags: the tags of the network
1344 env["NETWORK_NAME"] = name
1346 env["NETWORK_SUBNET"] = subnet
1348 env["NETWORK_GATEWAY"] = gateway
1350 env["NETWORK_SUBNET6"] = network6
1352 env["NETWORK_GATEWAY6"] = gateway6
1354 env["NETWORK_MAC_PREFIX"] = mac_prefix
1356 env["NETWORK_TYPE"] = network_type
1358 env["NETWORK_TAGS"] = " ".join(tags)
1363 def _BuildNetworkHookEnvByObject(net):
1364 """Builds network related env varliables for hooks
1366 @type net: L{objects.Network}
1367 @param net: the network object
1372 "subnet": net.network,
1373 "gateway": net.gateway,
1374 "network6": net.network6,
1375 "gateway6": net.gateway6,
1376 "network_type": net.network_type,
1377 "mac_prefix": net.mac_prefix,
1381 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1384 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1385 minmem, maxmem, vcpus, nics, disk_template, disks,
1386 bep, hvp, hypervisor_name, tags):
1387 """Builds instance related env variables for hooks
1389 This builds the hook environment from individual variables.
1392 @param name: the name of the instance
1393 @type primary_node: string
1394 @param primary_node: the name of the instance's primary node
1395 @type secondary_nodes: list
1396 @param secondary_nodes: list of secondary nodes as strings
1397 @type os_type: string
1398 @param os_type: the name of the instance's OS
1399 @type status: string
1400 @param status: the desired status of the instance
1401 @type minmem: string
1402 @param minmem: the minimum memory size of the instance
1403 @type maxmem: string
1404 @param maxmem: the maximum memory size of the instance
1406 @param vcpus: the count of VCPUs the instance has
1408 @param nics: list of tuples (ip, mac, mode, link, network) representing
1409 the NICs the instance has
1410 @type disk_template: string
1411 @param disk_template: the disk template of the instance
1413 @param disks: the list of (size, mode) pairs
1415 @param bep: the backend parameters for the instance
1417 @param hvp: the hypervisor parameters for the instance
1418 @type hypervisor_name: string
1419 @param hypervisor_name: the hypervisor for the instance
1421 @param tags: list of instance tags as strings
1423 @return: the hook environment for this instance
1428 "INSTANCE_NAME": name,
1429 "INSTANCE_PRIMARY": primary_node,
1430 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1431 "INSTANCE_OS_TYPE": os_type,
1432 "INSTANCE_STATUS": status,
1433 "INSTANCE_MINMEM": minmem,
1434 "INSTANCE_MAXMEM": maxmem,
1435 # TODO(2.7) remove deprecated "memory" value
1436 "INSTANCE_MEMORY": maxmem,
1437 "INSTANCE_VCPUS": vcpus,
1438 "INSTANCE_DISK_TEMPLATE": disk_template,
1439 "INSTANCE_HYPERVISOR": hypervisor_name,
1442 nic_count = len(nics)
1443 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1446 env["INSTANCE_NIC%d_IP" % idx] = ip
1447 env["INSTANCE_NIC%d_MAC" % idx] = mac
1448 env["INSTANCE_NIC%d_MODE" % idx] = mode
1449 env["INSTANCE_NIC%d_LINK" % idx] = link
1451 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1453 nobj = objects.Network.FromDict(netinfo)
1455 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1457 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1459 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1461 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1463 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1464 if nobj.network_type:
1465 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1467 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1468 if mode == constants.NIC_MODE_BRIDGED:
1469 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1473 env["INSTANCE_NIC_COUNT"] = nic_count
1476 disk_count = len(disks)
1477 for idx, (size, mode) in enumerate(disks):
1478 env["INSTANCE_DISK%d_SIZE" % idx] = size
1479 env["INSTANCE_DISK%d_MODE" % idx] = mode
1483 env["INSTANCE_DISK_COUNT"] = disk_count
1488 env["INSTANCE_TAGS"] = " ".join(tags)
1490 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1491 for key, value in source.items():
1492 env["INSTANCE_%s_%s" % (kind, key)] = value
1497 def _NICToTuple(lu, nic):
1498 """Build a tupple of nic information.
1500 @type lu: L{LogicalUnit}
1501 @param lu: the logical unit on whose behalf we execute
1502 @type nic: L{objects.NIC}
1503 @param nic: nic to convert to hooks tuple
1508 cluster = lu.cfg.GetClusterInfo()
1509 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1510 mode = filled_params[constants.NIC_MODE]
1511 link = filled_params[constants.NIC_LINK]
1515 net_uuid = lu.cfg.LookupNetwork(net)
1517 nobj = lu.cfg.GetNetwork(net_uuid)
1518 netinfo = objects.Network.ToDict(nobj)
1519 return (ip, mac, mode, link, net, netinfo)
1522 def _NICListToTuple(lu, nics):
1523 """Build a list of nic information tuples.
1525 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1526 value in LUInstanceQueryData.
1528 @type lu: L{LogicalUnit}
1529 @param lu: the logical unit on whose behalf we execute
1530 @type nics: list of L{objects.NIC}
1531 @param nics: list of nics to convert to hooks tuples
1536 hooks_nics.append(_NICToTuple(lu, nic))
1540 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1541 """Builds instance related env variables for hooks from an object.
1543 @type lu: L{LogicalUnit}
1544 @param lu: the logical unit on whose behalf we execute
1545 @type instance: L{objects.Instance}
1546 @param instance: the instance for which we should build the
1548 @type override: dict
1549 @param override: dictionary with key/values that will override
1552 @return: the hook environment dictionary
1555 cluster = lu.cfg.GetClusterInfo()
1556 bep = cluster.FillBE(instance)
1557 hvp = cluster.FillHV(instance)
1559 "name": instance.name,
1560 "primary_node": instance.primary_node,
1561 "secondary_nodes": instance.secondary_nodes,
1562 "os_type": instance.os,
1563 "status": instance.admin_state,
1564 "maxmem": bep[constants.BE_MAXMEM],
1565 "minmem": bep[constants.BE_MINMEM],
1566 "vcpus": bep[constants.BE_VCPUS],
1567 "nics": _NICListToTuple(lu, instance.nics),
1568 "disk_template": instance.disk_template,
1569 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1572 "hypervisor_name": instance.hypervisor,
1573 "tags": instance.tags,
1576 args.update(override)
1577 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1580 def _AdjustCandidatePool(lu, exceptions):
1581 """Adjust the candidate pool after node operations.
1584 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1586 lu.LogInfo("Promoted nodes to master candidate role: %s",
1587 utils.CommaJoin(node.name for node in mod_list))
1588 for name in mod_list:
1589 lu.context.ReaddNode(name)
1590 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1592 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1596 def _DecideSelfPromotion(lu, exceptions=None):
1597 """Decide whether I should promote myself as a master candidate.
1600 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1601 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1602 # the new node will increase mc_max with one, so:
1603 mc_should = min(mc_should + 1, cp_size)
1604 return mc_now < mc_should
1607 def _ComputeViolatingInstances(ipolicy, instances):
1608 """Computes a set of instances who violates given ipolicy.
1610 @param ipolicy: The ipolicy to verify
1611 @type instances: object.Instance
1612 @param instances: List of instances to verify
1613 @return: A frozenset of instance names violating the ipolicy
1616 return frozenset([inst.name for inst in instances
1617 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1620 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1621 """Check that the brigdes needed by a list of nics exist.
1624 cluster = lu.cfg.GetClusterInfo()
1625 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1626 brlist = [params[constants.NIC_LINK] for params in paramslist
1627 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1629 result = lu.rpc.call_bridges_exist(target_node, brlist)
1630 result.Raise("Error checking bridges on destination node '%s'" %
1631 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1634 def _CheckInstanceBridgesExist(lu, instance, node=None):
1635 """Check that the brigdes needed by an instance exist.
1639 node = instance.primary_node
1640 _CheckNicsBridgesExist(lu, instance.nics, node)
1643 def _CheckOSVariant(os_obj, name):
1644 """Check whether an OS name conforms to the os variants specification.
1646 @type os_obj: L{objects.OS}
1647 @param os_obj: OS object to check
1649 @param name: OS name passed by the user, to check for validity
1652 variant = objects.OS.GetVariant(name)
1653 if not os_obj.supported_variants:
1655 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1656 " passed)" % (os_obj.name, variant),
1660 raise errors.OpPrereqError("OS name must include a variant",
1663 if variant not in os_obj.supported_variants:
1664 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1667 def _GetNodeInstancesInner(cfg, fn):
1668 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1671 def _GetNodeInstances(cfg, node_name):
1672 """Returns a list of all primary and secondary instances on a node.
1676 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1679 def _GetNodePrimaryInstances(cfg, node_name):
1680 """Returns primary instances on a node.
1683 return _GetNodeInstancesInner(cfg,
1684 lambda inst: node_name == inst.primary_node)
1687 def _GetNodeSecondaryInstances(cfg, node_name):
1688 """Returns secondary instances on a node.
1691 return _GetNodeInstancesInner(cfg,
1692 lambda inst: node_name in inst.secondary_nodes)
1695 def _GetStorageTypeArgs(cfg, storage_type):
1696 """Returns the arguments for a storage type.
1699 # Special case for file storage
1700 if storage_type == constants.ST_FILE:
1701 # storage.FileStorage wants a list of storage directories
1702 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1707 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1710 for dev in instance.disks:
1711 cfg.SetDiskID(dev, node_name)
1713 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1715 result.Raise("Failed to get disk status from node %s" % node_name,
1716 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1718 for idx, bdev_status in enumerate(result.payload):
1719 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1725 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1726 """Check the sanity of iallocator and node arguments and use the
1727 cluster-wide iallocator if appropriate.
1729 Check that at most one of (iallocator, node) is specified. If none is
1730 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1731 then the LU's opcode's iallocator slot is filled with the cluster-wide
1734 @type iallocator_slot: string
1735 @param iallocator_slot: the name of the opcode iallocator slot
1736 @type node_slot: string
1737 @param node_slot: the name of the opcode target node slot
1740 node = getattr(lu.op, node_slot, None)
1741 ialloc = getattr(lu.op, iallocator_slot, None)
1745 if node is not None and ialloc is not None:
1746 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1748 elif ((node is None and ialloc is None) or
1749 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1750 default_iallocator = lu.cfg.GetDefaultIAllocator()
1751 if default_iallocator:
1752 setattr(lu.op, iallocator_slot, default_iallocator)
1754 raise errors.OpPrereqError("No iallocator or node given and no"
1755 " cluster-wide default iallocator found;"
1756 " please specify either an iallocator or a"
1757 " node, or set a cluster-wide default"
1758 " iallocator", errors.ECODE_INVAL)
1761 def _GetDefaultIAllocator(cfg, ialloc):
1762 """Decides on which iallocator to use.
1764 @type cfg: L{config.ConfigWriter}
1765 @param cfg: Cluster configuration object
1766 @type ialloc: string or None
1767 @param ialloc: Iallocator specified in opcode
1769 @return: Iallocator name
1773 # Use default iallocator
1774 ialloc = cfg.GetDefaultIAllocator()
1777 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1778 " opcode nor as a cluster-wide default",
1784 def _CheckHostnameSane(lu, name):
1785 """Ensures that a given hostname resolves to a 'sane' name.
1787 The given name is required to be a prefix of the resolved hostname,
1788 to prevent accidental mismatches.
1790 @param lu: the logical unit on behalf of which we're checking
1791 @param name: the name we should resolve and check
1792 @return: the resolved hostname object
1795 hostname = netutils.GetHostname(name=name)
1796 if hostname.name != name:
1797 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1798 if not utils.MatchNameComponent(name, [hostname.name]):
1799 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1800 " same as given hostname '%s'") %
1801 (hostname.name, name), errors.ECODE_INVAL)
1805 class LUClusterPostInit(LogicalUnit):
1806 """Logical unit for running hooks after cluster initialization.
1809 HPATH = "cluster-init"
1810 HTYPE = constants.HTYPE_CLUSTER
1812 def BuildHooksEnv(self):
1817 "OP_TARGET": self.cfg.GetClusterName(),
1820 def BuildHooksNodes(self):
1821 """Build hooks nodes.
1824 return ([], [self.cfg.GetMasterNode()])
1826 def Exec(self, feedback_fn):
1833 class LUClusterDestroy(LogicalUnit):
1834 """Logical unit for destroying the cluster.
1837 HPATH = "cluster-destroy"
1838 HTYPE = constants.HTYPE_CLUSTER
1840 def BuildHooksEnv(self):
1845 "OP_TARGET": self.cfg.GetClusterName(),
1848 def BuildHooksNodes(self):
1849 """Build hooks nodes.
1854 def CheckPrereq(self):
1855 """Check prerequisites.
1857 This checks whether the cluster is empty.
1859 Any errors are signaled by raising errors.OpPrereqError.
1862 master = self.cfg.GetMasterNode()
1864 nodelist = self.cfg.GetNodeList()
1865 if len(nodelist) != 1 or nodelist[0] != master:
1866 raise errors.OpPrereqError("There are still %d node(s) in"
1867 " this cluster." % (len(nodelist) - 1),
1869 instancelist = self.cfg.GetInstanceList()
1871 raise errors.OpPrereqError("There are still %d instance(s) in"
1872 " this cluster." % len(instancelist),
1875 def Exec(self, feedback_fn):
1876 """Destroys the cluster.
1879 master_params = self.cfg.GetMasterNetworkParameters()
1881 # Run post hooks on master node before it's removed
1882 _RunPostHook(self, master_params.name)
1884 ems = self.cfg.GetUseExternalMipScript()
1885 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1888 self.LogWarning("Error disabling the master IP address: %s",
1891 return master_params.name
1894 def _VerifyCertificate(filename):
1895 """Verifies a certificate for L{LUClusterVerifyConfig}.
1897 @type filename: string
1898 @param filename: Path to PEM file
1902 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1903 utils.ReadFile(filename))
1904 except Exception, err: # pylint: disable=W0703
1905 return (LUClusterVerifyConfig.ETYPE_ERROR,
1906 "Failed to load X509 certificate %s: %s" % (filename, err))
1909 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1910 constants.SSL_CERT_EXPIRATION_ERROR)
1913 fnamemsg = "While verifying %s: %s" % (filename, msg)
1918 return (None, fnamemsg)
1919 elif errcode == utils.CERT_WARNING:
1920 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1921 elif errcode == utils.CERT_ERROR:
1922 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1924 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1927 def _GetAllHypervisorParameters(cluster, instances):
1928 """Compute the set of all hypervisor parameters.
1930 @type cluster: L{objects.Cluster}
1931 @param cluster: the cluster object
1932 @param instances: list of L{objects.Instance}
1933 @param instances: additional instances from which to obtain parameters
1934 @rtype: list of (origin, hypervisor, parameters)
1935 @return: a list with all parameters found, indicating the hypervisor they
1936 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1941 for hv_name in cluster.enabled_hypervisors:
1942 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1944 for os_name, os_hvp in cluster.os_hvp.items():
1945 for hv_name, hv_params in os_hvp.items():
1947 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1948 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1950 # TODO: collapse identical parameter values in a single one
1951 for instance in instances:
1952 if instance.hvparams:
1953 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1954 cluster.FillHV(instance)))
1959 class _VerifyErrors(object):
1960 """Mix-in for cluster/group verify LUs.
1962 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1963 self.op and self._feedback_fn to be available.)
1967 ETYPE_FIELD = "code"
1968 ETYPE_ERROR = "ERROR"
1969 ETYPE_WARNING = "WARNING"
1971 def _Error(self, ecode, item, msg, *args, **kwargs):
1972 """Format an error message.
1974 Based on the opcode's error_codes parameter, either format a
1975 parseable error code, or a simpler error string.
1977 This must be called only from Exec and functions called from Exec.
1980 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1981 itype, etxt, _ = ecode
1982 # first complete the msg
1985 # then format the whole message
1986 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1987 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1993 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1994 # and finally report it via the feedback_fn
1995 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1997 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1998 """Log an error message if the passed condition is True.
2002 or self.op.debug_simulate_errors) # pylint: disable=E1101
2004 # If the error code is in the list of ignored errors, demote the error to a
2006 (_, etxt, _) = ecode
2007 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2008 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2011 self._Error(ecode, *args, **kwargs)
2013 # do not mark the operation as failed for WARN cases only
2014 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2015 self.bad = self.bad or cond
2018 class LUClusterVerify(NoHooksLU):
2019 """Submits all jobs necessary to verify the cluster.
2024 def ExpandNames(self):
2025 self.needed_locks = {}
2027 def Exec(self, feedback_fn):
2030 if self.op.group_name:
2031 groups = [self.op.group_name]
2032 depends_fn = lambda: None
2034 groups = self.cfg.GetNodeGroupList()
2036 # Verify global configuration
2038 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2041 # Always depend on global verification
2042 depends_fn = lambda: [(-len(jobs), [])]
2045 [opcodes.OpClusterVerifyGroup(group_name=group,
2046 ignore_errors=self.op.ignore_errors,
2047 depends=depends_fn())]
2048 for group in groups)
2050 # Fix up all parameters
2051 for op in itertools.chain(*jobs): # pylint: disable=W0142
2052 op.debug_simulate_errors = self.op.debug_simulate_errors
2053 op.verbose = self.op.verbose
2054 op.error_codes = self.op.error_codes
2056 op.skip_checks = self.op.skip_checks
2057 except AttributeError:
2058 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2060 return ResultWithJobs(jobs)
2063 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2064 """Verifies the cluster config.
2069 def _VerifyHVP(self, hvp_data):
2070 """Verifies locally the syntax of the hypervisor parameters.
2073 for item, hv_name, hv_params in hvp_data:
2074 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2077 hv_class = hypervisor.GetHypervisor(hv_name)
2078 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2079 hv_class.CheckParameterSyntax(hv_params)
2080 except errors.GenericError, err:
2081 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2083 def ExpandNames(self):
2084 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2085 self.share_locks = _ShareAll()
2087 def CheckPrereq(self):
2088 """Check prerequisites.
2091 # Retrieve all information
2092 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2093 self.all_node_info = self.cfg.GetAllNodesInfo()
2094 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2096 def Exec(self, feedback_fn):
2097 """Verify integrity of cluster, performing various test on nodes.
2101 self._feedback_fn = feedback_fn
2103 feedback_fn("* Verifying cluster config")
2105 for msg in self.cfg.VerifyConfig():
2106 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2108 feedback_fn("* Verifying cluster certificate files")
2110 for cert_filename in pathutils.ALL_CERT_FILES:
2111 (errcode, msg) = _VerifyCertificate(cert_filename)
2112 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2114 feedback_fn("* Verifying hypervisor parameters")
2116 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2117 self.all_inst_info.values()))
2119 feedback_fn("* Verifying all nodes belong to an existing group")
2121 # We do this verification here because, should this bogus circumstance
2122 # occur, it would never be caught by VerifyGroup, which only acts on
2123 # nodes/instances reachable from existing node groups.
2125 dangling_nodes = set(node.name for node in self.all_node_info.values()
2126 if node.group not in self.all_group_info)
2128 dangling_instances = {}
2129 no_node_instances = []
2131 for inst in self.all_inst_info.values():
2132 if inst.primary_node in dangling_nodes:
2133 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2134 elif inst.primary_node not in self.all_node_info:
2135 no_node_instances.append(inst.name)
2140 utils.CommaJoin(dangling_instances.get(node.name,
2142 for node in dangling_nodes]
2144 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2146 "the following nodes (and their instances) belong to a non"
2147 " existing group: %s", utils.CommaJoin(pretty_dangling))
2149 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2151 "the following instances have a non-existing primary-node:"
2152 " %s", utils.CommaJoin(no_node_instances))
2157 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2158 """Verifies the status of a node group.
2161 HPATH = "cluster-verify"
2162 HTYPE = constants.HTYPE_CLUSTER
2165 _HOOKS_INDENT_RE = re.compile("^", re.M)
2167 class NodeImage(object):
2168 """A class representing the logical and physical status of a node.
2171 @ivar name: the node name to which this object refers
2172 @ivar volumes: a structure as returned from
2173 L{ganeti.backend.GetVolumeList} (runtime)
2174 @ivar instances: a list of running instances (runtime)
2175 @ivar pinst: list of configured primary instances (config)
2176 @ivar sinst: list of configured secondary instances (config)
2177 @ivar sbp: dictionary of {primary-node: list of instances} for all
2178 instances for which this node is secondary (config)
2179 @ivar mfree: free memory, as reported by hypervisor (runtime)
2180 @ivar dfree: free disk, as reported by the node (runtime)
2181 @ivar offline: the offline status (config)
2182 @type rpc_fail: boolean
2183 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2184 not whether the individual keys were correct) (runtime)
2185 @type lvm_fail: boolean
2186 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2187 @type hyp_fail: boolean
2188 @ivar hyp_fail: whether the RPC call didn't return the instance list
2189 @type ghost: boolean
2190 @ivar ghost: whether this is a known node or not (config)
2191 @type os_fail: boolean
2192 @ivar os_fail: whether the RPC call didn't return valid OS data
2194 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2195 @type vm_capable: boolean
2196 @ivar vm_capable: whether the node can host instances
2199 def __init__(self, offline=False, name=None, vm_capable=True):
2208 self.offline = offline
2209 self.vm_capable = vm_capable
2210 self.rpc_fail = False
2211 self.lvm_fail = False
2212 self.hyp_fail = False
2214 self.os_fail = False
2217 def ExpandNames(self):
2218 # This raises errors.OpPrereqError on its own:
2219 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2221 # Get instances in node group; this is unsafe and needs verification later
2223 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2225 self.needed_locks = {
2226 locking.LEVEL_INSTANCE: inst_names,
2227 locking.LEVEL_NODEGROUP: [self.group_uuid],
2228 locking.LEVEL_NODE: [],
2231 self.share_locks = _ShareAll()
2233 def DeclareLocks(self, level):
2234 if level == locking.LEVEL_NODE:
2235 # Get members of node group; this is unsafe and needs verification later
2236 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2238 all_inst_info = self.cfg.GetAllInstancesInfo()
2240 # In Exec(), we warn about mirrored instances that have primary and
2241 # secondary living in separate node groups. To fully verify that
2242 # volumes for these instances are healthy, we will need to do an
2243 # extra call to their secondaries. We ensure here those nodes will
2245 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2246 # Important: access only the instances whose lock is owned
2247 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2248 nodes.update(all_inst_info[inst].secondary_nodes)
2250 self.needed_locks[locking.LEVEL_NODE] = nodes
2252 def CheckPrereq(self):
2253 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2254 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2256 group_nodes = set(self.group_info.members)
2258 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2261 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2263 unlocked_instances = \
2264 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2267 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2268 utils.CommaJoin(unlocked_nodes),
2271 if unlocked_instances:
2272 raise errors.OpPrereqError("Missing lock for instances: %s" %
2273 utils.CommaJoin(unlocked_instances),
2276 self.all_node_info = self.cfg.GetAllNodesInfo()
2277 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2279 self.my_node_names = utils.NiceSort(group_nodes)
2280 self.my_inst_names = utils.NiceSort(group_instances)
2282 self.my_node_info = dict((name, self.all_node_info[name])
2283 for name in self.my_node_names)
2285 self.my_inst_info = dict((name, self.all_inst_info[name])
2286 for name in self.my_inst_names)
2288 # We detect here the nodes that will need the extra RPC calls for verifying
2289 # split LV volumes; they should be locked.
2290 extra_lv_nodes = set()
2292 for inst in self.my_inst_info.values():
2293 if inst.disk_template in constants.DTS_INT_MIRROR:
2294 for nname in inst.all_nodes:
2295 if self.all_node_info[nname].group != self.group_uuid:
2296 extra_lv_nodes.add(nname)
2298 unlocked_lv_nodes = \
2299 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2301 if unlocked_lv_nodes:
2302 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2303 utils.CommaJoin(unlocked_lv_nodes),
2305 self.extra_lv_nodes = list(extra_lv_nodes)
2307 def _VerifyNode(self, ninfo, nresult):
2308 """Perform some basic validation on data returned from a node.
2310 - check the result data structure is well formed and has all the
2312 - check ganeti version
2314 @type ninfo: L{objects.Node}
2315 @param ninfo: the node to check
2316 @param nresult: the results from the node
2318 @return: whether overall this call was successful (and we can expect
2319 reasonable values in the respose)
2323 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2325 # main result, nresult should be a non-empty dict
2326 test = not nresult or not isinstance(nresult, dict)
2327 _ErrorIf(test, constants.CV_ENODERPC, node,
2328 "unable to verify node: no data returned")
2332 # compares ganeti version
2333 local_version = constants.PROTOCOL_VERSION
2334 remote_version = nresult.get("version", None)
2335 test = not (remote_version and
2336 isinstance(remote_version, (list, tuple)) and
2337 len(remote_version) == 2)
2338 _ErrorIf(test, constants.CV_ENODERPC, node,
2339 "connection to node returned invalid data")
2343 test = local_version != remote_version[0]
2344 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2345 "incompatible protocol versions: master %s,"
2346 " node %s", local_version, remote_version[0])
2350 # node seems compatible, we can actually try to look into its results
2352 # full package version
2353 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2354 constants.CV_ENODEVERSION, node,
2355 "software version mismatch: master %s, node %s",
2356 constants.RELEASE_VERSION, remote_version[1],
2357 code=self.ETYPE_WARNING)
2359 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2360 if ninfo.vm_capable and isinstance(hyp_result, dict):
2361 for hv_name, hv_result in hyp_result.iteritems():
2362 test = hv_result is not None
2363 _ErrorIf(test, constants.CV_ENODEHV, node,
2364 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2366 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2367 if ninfo.vm_capable and isinstance(hvp_result, list):
2368 for item, hv_name, hv_result in hvp_result:
2369 _ErrorIf(True, constants.CV_ENODEHV, node,
2370 "hypervisor %s parameter verify failure (source %s): %s",
2371 hv_name, item, hv_result)
2373 test = nresult.get(constants.NV_NODESETUP,
2374 ["Missing NODESETUP results"])
2375 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2380 def _VerifyNodeTime(self, ninfo, nresult,
2381 nvinfo_starttime, nvinfo_endtime):
2382 """Check the node time.
2384 @type ninfo: L{objects.Node}
2385 @param ninfo: the node to check
2386 @param nresult: the remote results for the node
2387 @param nvinfo_starttime: the start time of the RPC call
2388 @param nvinfo_endtime: the end time of the RPC call
2392 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2394 ntime = nresult.get(constants.NV_TIME, None)
2396 ntime_merged = utils.MergeTime(ntime)
2397 except (ValueError, TypeError):
2398 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2401 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2402 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2403 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2404 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2408 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2409 "Node time diverges by at least %s from master node time",
2412 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2413 """Check the node LVM results.
2415 @type ninfo: L{objects.Node}
2416 @param ninfo: the node to check
2417 @param nresult: the remote results for the node
2418 @param vg_name: the configured VG name
2425 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2427 # checks vg existence and size > 20G
2428 vglist = nresult.get(constants.NV_VGLIST, None)
2430 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2432 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2433 constants.MIN_VG_SIZE)
2434 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2437 pvlist = nresult.get(constants.NV_PVLIST, None)
2438 test = pvlist is None
2439 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2441 # check that ':' is not present in PV names, since it's a
2442 # special character for lvcreate (denotes the range of PEs to
2444 for _, pvname, owner_vg in pvlist:
2445 test = ":" in pvname
2446 _ErrorIf(test, constants.CV_ENODELVM, node,
2447 "Invalid character ':' in PV '%s' of VG '%s'",
2450 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2451 """Check the node bridges.
2453 @type ninfo: L{objects.Node}
2454 @param ninfo: the node to check
2455 @param nresult: the remote results for the node
2456 @param bridges: the expected list of bridges
2463 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2465 missing = nresult.get(constants.NV_BRIDGES, None)
2466 test = not isinstance(missing, list)
2467 _ErrorIf(test, constants.CV_ENODENET, node,
2468 "did not return valid bridge information")
2470 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2471 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2473 def _VerifyNodeUserScripts(self, ninfo, nresult):
2474 """Check the results of user scripts presence and executability on the node
2476 @type ninfo: L{objects.Node}
2477 @param ninfo: the node to check
2478 @param nresult: the remote results for the node
2483 test = not constants.NV_USERSCRIPTS in nresult
2484 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2485 "did not return user scripts information")
2487 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2489 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2490 "user scripts not present or not executable: %s" %
2491 utils.CommaJoin(sorted(broken_scripts)))
2493 def _VerifyNodeNetwork(self, ninfo, nresult):
2494 """Check the node network connectivity results.
2496 @type ninfo: L{objects.Node}
2497 @param ninfo: the node to check
2498 @param nresult: the remote results for the node
2502 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2504 test = constants.NV_NODELIST not in nresult
2505 _ErrorIf(test, constants.CV_ENODESSH, node,
2506 "node hasn't returned node ssh connectivity data")
2508 if nresult[constants.NV_NODELIST]:
2509 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2510 _ErrorIf(True, constants.CV_ENODESSH, node,
2511 "ssh communication with node '%s': %s", a_node, a_msg)
2513 test = constants.NV_NODENETTEST not in nresult
2514 _ErrorIf(test, constants.CV_ENODENET, node,
2515 "node hasn't returned node tcp connectivity data")
2517 if nresult[constants.NV_NODENETTEST]:
2518 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2520 _ErrorIf(True, constants.CV_ENODENET, node,
2521 "tcp communication with node '%s': %s",
2522 anode, nresult[constants.NV_NODENETTEST][anode])
2524 test = constants.NV_MASTERIP not in nresult
2525 _ErrorIf(test, constants.CV_ENODENET, node,
2526 "node hasn't returned node master IP reachability data")
2528 if not nresult[constants.NV_MASTERIP]:
2529 if node == self.master_node:
2530 msg = "the master node cannot reach the master IP (not configured?)"
2532 msg = "cannot reach the master IP"
2533 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2535 def _VerifyInstance(self, instance, instanceconfig, node_image,
2537 """Verify an instance.
2539 This function checks to see if the required block devices are
2540 available on the instance's node.
2543 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2544 node_current = instanceconfig.primary_node
2546 node_vol_should = {}
2547 instanceconfig.MapLVsByNode(node_vol_should)
2549 cluster = self.cfg.GetClusterInfo()
2550 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2552 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2553 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2555 for node in node_vol_should:
2556 n_img = node_image[node]
2557 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2558 # ignore missing volumes on offline or broken nodes
2560 for volume in node_vol_should[node]:
2561 test = volume not in n_img.volumes
2562 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2563 "volume %s missing on node %s", volume, node)
2565 if instanceconfig.admin_state == constants.ADMINST_UP:
2566 pri_img = node_image[node_current]
2567 test = instance not in pri_img.instances and not pri_img.offline
2568 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2569 "instance not running on its primary node %s",
2572 diskdata = [(nname, success, status, idx)
2573 for (nname, disks) in diskstatus.items()
2574 for idx, (success, status) in enumerate(disks)]
2576 for nname, success, bdev_status, idx in diskdata:
2577 # the 'ghost node' construction in Exec() ensures that we have a
2579 snode = node_image[nname]
2580 bad_snode = snode.ghost or snode.offline
2581 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2582 not success and not bad_snode,
2583 constants.CV_EINSTANCEFAULTYDISK, instance,
2584 "couldn't retrieve status for disk/%s on %s: %s",
2585 idx, nname, bdev_status)
2586 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2587 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2588 constants.CV_EINSTANCEFAULTYDISK, instance,
2589 "disk/%s on %s is faulty", idx, nname)
2591 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2592 """Verify if there are any unknown volumes in the cluster.
2594 The .os, .swap and backup volumes are ignored. All other volumes are
2595 reported as unknown.
2597 @type reserved: L{ganeti.utils.FieldSet}
2598 @param reserved: a FieldSet of reserved volume names
2601 for node, n_img in node_image.items():
2602 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2603 self.all_node_info[node].group != self.group_uuid):
2604 # skip non-healthy nodes
2606 for volume in n_img.volumes:
2607 test = ((node not in node_vol_should or
2608 volume not in node_vol_should[node]) and
2609 not reserved.Matches(volume))
2610 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2611 "volume %s is unknown", volume)
2613 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2614 """Verify N+1 Memory Resilience.
2616 Check that if one single node dies we can still start all the
2617 instances it was primary for.
2620 cluster_info = self.cfg.GetClusterInfo()
2621 for node, n_img in node_image.items():
2622 # This code checks that every node which is now listed as
2623 # secondary has enough memory to host all instances it is
2624 # supposed to should a single other node in the cluster fail.
2625 # FIXME: not ready for failover to an arbitrary node
2626 # FIXME: does not support file-backed instances
2627 # WARNING: we currently take into account down instances as well
2628 # as up ones, considering that even if they're down someone
2629 # might want to start them even in the event of a node failure.
2630 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2631 # we're skipping nodes marked offline and nodes in other groups from
2632 # the N+1 warning, since most likely we don't have good memory
2633 # infromation from them; we already list instances living on such
2634 # nodes, and that's enough warning
2636 #TODO(dynmem): also consider ballooning out other instances
2637 for prinode, instances in n_img.sbp.items():
2639 for instance in instances:
2640 bep = cluster_info.FillBE(instance_cfg[instance])
2641 if bep[constants.BE_AUTO_BALANCE]:
2642 needed_mem += bep[constants.BE_MINMEM]
2643 test = n_img.mfree < needed_mem
2644 self._ErrorIf(test, constants.CV_ENODEN1, node,
2645 "not enough memory to accomodate instance failovers"
2646 " should node %s fail (%dMiB needed, %dMiB available)",
2647 prinode, needed_mem, n_img.mfree)
2650 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2651 (files_all, files_opt, files_mc, files_vm)):
2652 """Verifies file checksums collected from all nodes.
2654 @param errorif: Callback for reporting errors
2655 @param nodeinfo: List of L{objects.Node} objects
2656 @param master_node: Name of master node
2657 @param all_nvinfo: RPC results
2660 # Define functions determining which nodes to consider for a file
2663 (files_mc, lambda node: (node.master_candidate or
2664 node.name == master_node)),
2665 (files_vm, lambda node: node.vm_capable),
2668 # Build mapping from filename to list of nodes which should have the file
2670 for (files, fn) in files2nodefn:
2672 filenodes = nodeinfo
2674 filenodes = filter(fn, nodeinfo)
2675 nodefiles.update((filename,
2676 frozenset(map(operator.attrgetter("name"), filenodes)))
2677 for filename in files)
2679 assert set(nodefiles) == (files_all | files_mc | files_vm)
2681 fileinfo = dict((filename, {}) for filename in nodefiles)
2682 ignore_nodes = set()
2684 for node in nodeinfo:
2686 ignore_nodes.add(node.name)
2689 nresult = all_nvinfo[node.name]
2691 if nresult.fail_msg or not nresult.payload:
2694 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2695 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2696 for (key, value) in fingerprints.items())
2699 test = not (node_files and isinstance(node_files, dict))
2700 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2701 "Node did not return file checksum data")
2703 ignore_nodes.add(node.name)
2706 # Build per-checksum mapping from filename to nodes having it
2707 for (filename, checksum) in node_files.items():
2708 assert filename in nodefiles
2709 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2711 for (filename, checksums) in fileinfo.items():
2712 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2714 # Nodes having the file
2715 with_file = frozenset(node_name
2716 for nodes in fileinfo[filename].values()
2717 for node_name in nodes) - ignore_nodes
2719 expected_nodes = nodefiles[filename] - ignore_nodes
2721 # Nodes missing file
2722 missing_file = expected_nodes - with_file
2724 if filename in files_opt:
2726 errorif(missing_file and missing_file != expected_nodes,
2727 constants.CV_ECLUSTERFILECHECK, None,
2728 "File %s is optional, but it must exist on all or no"
2729 " nodes (not found on %s)",
2730 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2732 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2733 "File %s is missing from node(s) %s", filename,
2734 utils.CommaJoin(utils.NiceSort(missing_file)))
2736 # Warn if a node has a file it shouldn't
2737 unexpected = with_file - expected_nodes
2739 constants.CV_ECLUSTERFILECHECK, None,
2740 "File %s should not exist on node(s) %s",
2741 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2743 # See if there are multiple versions of the file
2744 test = len(checksums) > 1
2746 variants = ["variant %s on %s" %
2747 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2748 for (idx, (checksum, nodes)) in
2749 enumerate(sorted(checksums.items()))]
2753 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2754 "File %s found with %s different checksums (%s)",
2755 filename, len(checksums), "; ".join(variants))
2757 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2759 """Verifies and the node DRBD status.
2761 @type ninfo: L{objects.Node}
2762 @param ninfo: the node to check
2763 @param nresult: the remote results for the node
2764 @param instanceinfo: the dict of instances
2765 @param drbd_helper: the configured DRBD usermode helper
2766 @param drbd_map: the DRBD map as returned by
2767 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2771 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2774 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2775 test = (helper_result is None)
2776 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2777 "no drbd usermode helper returned")
2779 status, payload = helper_result
2781 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2782 "drbd usermode helper check unsuccessful: %s", payload)
2783 test = status and (payload != drbd_helper)
2784 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2785 "wrong drbd usermode helper: %s", payload)
2787 # compute the DRBD minors
2789 for minor, instance in drbd_map[node].items():
2790 test = instance not in instanceinfo
2791 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2792 "ghost instance '%s' in temporary DRBD map", instance)
2793 # ghost instance should not be running, but otherwise we
2794 # don't give double warnings (both ghost instance and
2795 # unallocated minor in use)
2797 node_drbd[minor] = (instance, False)
2799 instance = instanceinfo[instance]
2800 node_drbd[minor] = (instance.name,
2801 instance.admin_state == constants.ADMINST_UP)
2803 # and now check them
2804 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2805 test = not isinstance(used_minors, (tuple, list))
2806 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2807 "cannot parse drbd status file: %s", str(used_minors))
2809 # we cannot check drbd status
2812 for minor, (iname, must_exist) in node_drbd.items():
2813 test = minor not in used_minors and must_exist
2814 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2815 "drbd minor %d of instance %s is not active", minor, iname)
2816 for minor in used_minors:
2817 test = minor not in node_drbd
2818 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2819 "unallocated drbd minor %d is in use", minor)
2821 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2822 """Builds the node OS structures.
2824 @type ninfo: L{objects.Node}
2825 @param ninfo: the node to check
2826 @param nresult: the remote results for the node
2827 @param nimg: the node image object
2831 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2833 remote_os = nresult.get(constants.NV_OSLIST, None)
2834 test = (not isinstance(remote_os, list) or
2835 not compat.all(isinstance(v, list) and len(v) == 7
2836 for v in remote_os))
2838 _ErrorIf(test, constants.CV_ENODEOS, node,
2839 "node hasn't returned valid OS data")
2848 for (name, os_path, status, diagnose,
2849 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2851 if name not in os_dict:
2854 # parameters is a list of lists instead of list of tuples due to
2855 # JSON lacking a real tuple type, fix it:
2856 parameters = [tuple(v) for v in parameters]
2857 os_dict[name].append((os_path, status, diagnose,
2858 set(variants), set(parameters), set(api_ver)))
2860 nimg.oslist = os_dict
2862 def _VerifyNodeOS(self, ninfo, nimg, base):
2863 """Verifies the node OS list.
2865 @type ninfo: L{objects.Node}
2866 @param ninfo: the node to check
2867 @param nimg: the node image object
2868 @param base: the 'template' node we match against (e.g. from the master)
2872 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2874 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2876 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2877 for os_name, os_data in nimg.oslist.items():
2878 assert os_data, "Empty OS status for OS %s?!" % os_name
2879 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2880 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2881 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2882 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2883 "OS '%s' has multiple entries (first one shadows the rest): %s",
2884 os_name, utils.CommaJoin([v[0] for v in os_data]))
2885 # comparisons with the 'base' image
2886 test = os_name not in base.oslist
2887 _ErrorIf(test, constants.CV_ENODEOS, node,
2888 "Extra OS %s not present on reference node (%s)",
2892 assert base.oslist[os_name], "Base node has empty OS status?"
2893 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2895 # base OS is invalid, skipping
2897 for kind, a, b in [("API version", f_api, b_api),
2898 ("variants list", f_var, b_var),
2899 ("parameters", beautify_params(f_param),
2900 beautify_params(b_param))]:
2901 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2902 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2903 kind, os_name, base.name,
2904 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2906 # check any missing OSes
2907 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2908 _ErrorIf(missing, constants.CV_ENODEOS, node,
2909 "OSes present on reference node %s but missing on this node: %s",
2910 base.name, utils.CommaJoin(missing))
2912 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2913 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2915 @type ninfo: L{objects.Node}
2916 @param ninfo: the node to check
2917 @param nresult: the remote results for the node
2918 @type is_master: bool
2919 @param is_master: Whether node is the master node
2925 (constants.ENABLE_FILE_STORAGE or
2926 constants.ENABLE_SHARED_FILE_STORAGE)):
2928 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2930 # This should never happen
2931 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2932 "Node did not return forbidden file storage paths")
2934 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2935 "Found forbidden file storage paths: %s",
2936 utils.CommaJoin(fspaths))
2938 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2939 constants.CV_ENODEFILESTORAGEPATHS, node,
2940 "Node should not have returned forbidden file storage"
2943 def _VerifyOob(self, ninfo, nresult):
2944 """Verifies out of band functionality of a node.
2946 @type ninfo: L{objects.Node}
2947 @param ninfo: the node to check
2948 @param nresult: the remote results for the node
2952 # We just have to verify the paths on master and/or master candidates
2953 # as the oob helper is invoked on the master
2954 if ((ninfo.master_candidate or ninfo.master_capable) and
2955 constants.NV_OOB_PATHS in nresult):
2956 for path_result in nresult[constants.NV_OOB_PATHS]:
2957 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2959 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2960 """Verifies and updates the node volume data.
2962 This function will update a L{NodeImage}'s internal structures
2963 with data from the remote call.
2965 @type ninfo: L{objects.Node}
2966 @param ninfo: the node to check
2967 @param nresult: the remote results for the node
2968 @param nimg: the node image object
2969 @param vg_name: the configured VG name
2973 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2975 nimg.lvm_fail = True
2976 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2979 elif isinstance(lvdata, basestring):
2980 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2981 utils.SafeEncode(lvdata))
2982 elif not isinstance(lvdata, dict):
2983 _ErrorIf(True, constants.CV_ENODELVM, node,
2984 "rpc call to node failed (lvlist)")
2986 nimg.volumes = lvdata
2987 nimg.lvm_fail = False
2989 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2990 """Verifies and updates the node instance list.
2992 If the listing was successful, then updates this node's instance
2993 list. Otherwise, it marks the RPC call as failed for the instance
2996 @type ninfo: L{objects.Node}
2997 @param ninfo: the node to check
2998 @param nresult: the remote results for the node
2999 @param nimg: the node image object
3002 idata = nresult.get(constants.NV_INSTANCELIST, None)
3003 test = not isinstance(idata, list)
3004 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3005 "rpc call to node failed (instancelist): %s",
3006 utils.SafeEncode(str(idata)))
3008 nimg.hyp_fail = True
3010 nimg.instances = idata
3012 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3013 """Verifies and computes a node information map
3015 @type ninfo: L{objects.Node}
3016 @param ninfo: the node to check
3017 @param nresult: the remote results for the node
3018 @param nimg: the node image object
3019 @param vg_name: the configured VG name
3023 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3025 # try to read free memory (from the hypervisor)
3026 hv_info = nresult.get(constants.NV_HVINFO, None)
3027 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3028 _ErrorIf(test, constants.CV_ENODEHV, node,
3029 "rpc call to node failed (hvinfo)")
3032 nimg.mfree = int(hv_info["memory_free"])
3033 except (ValueError, TypeError):
3034 _ErrorIf(True, constants.CV_ENODERPC, node,
3035 "node returned invalid nodeinfo, check hypervisor")
3037 # FIXME: devise a free space model for file based instances as well
3038 if vg_name is not None:
3039 test = (constants.NV_VGLIST not in nresult or
3040 vg_name not in nresult[constants.NV_VGLIST])
3041 _ErrorIf(test, constants.CV_ENODELVM, node,
3042 "node didn't return data for the volume group '%s'"
3043 " - it is either missing or broken", vg_name)
3046 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3047 except (ValueError, TypeError):
3048 _ErrorIf(True, constants.CV_ENODERPC, node,
3049 "node returned invalid LVM info, check LVM status")
3051 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3052 """Gets per-disk status information for all instances.
3054 @type nodelist: list of strings
3055 @param nodelist: Node names
3056 @type node_image: dict of (name, L{objects.Node})
3057 @param node_image: Node objects
3058 @type instanceinfo: dict of (name, L{objects.Instance})
3059 @param instanceinfo: Instance objects
3060 @rtype: {instance: {node: [(succes, payload)]}}
3061 @return: a dictionary of per-instance dictionaries with nodes as
3062 keys and disk information as values; the disk information is a
3063 list of tuples (success, payload)
3066 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3069 node_disks_devonly = {}
3070 diskless_instances = set()
3071 diskless = constants.DT_DISKLESS
3073 for nname in nodelist:
3074 node_instances = list(itertools.chain(node_image[nname].pinst,
3075 node_image[nname].sinst))
3076 diskless_instances.update(inst for inst in node_instances
3077 if instanceinfo[inst].disk_template == diskless)
3078 disks = [(inst, disk)
3079 for inst in node_instances
3080 for disk in instanceinfo[inst].disks]
3083 # No need to collect data
3086 node_disks[nname] = disks
3088 # _AnnotateDiskParams makes already copies of the disks
3090 for (inst, dev) in disks:
3091 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3092 self.cfg.SetDiskID(anno_disk, nname)
3093 devonly.append(anno_disk)
3095 node_disks_devonly[nname] = devonly
3097 assert len(node_disks) == len(node_disks_devonly)
3099 # Collect data from all nodes with disks
3100 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3103 assert len(result) == len(node_disks)
3107 for (nname, nres) in result.items():
3108 disks = node_disks[nname]
3111 # No data from this node
3112 data = len(disks) * [(False, "node offline")]
3115 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3116 "while getting disk information: %s", msg)
3118 # No data from this node
3119 data = len(disks) * [(False, msg)]
3122 for idx, i in enumerate(nres.payload):
3123 if isinstance(i, (tuple, list)) and len(i) == 2:
3126 logging.warning("Invalid result from node %s, entry %d: %s",
3128 data.append((False, "Invalid result from the remote node"))
3130 for ((inst, _), status) in zip(disks, data):
3131 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3133 # Add empty entries for diskless instances.
3134 for inst in diskless_instances:
3135 assert inst not in instdisk
3138 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3139 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3140 compat.all(isinstance(s, (tuple, list)) and
3141 len(s) == 2 for s in statuses)
3142 for inst, nnames in instdisk.items()
3143 for nname, statuses in nnames.items())
3144 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3149 def _SshNodeSelector(group_uuid, all_nodes):
3150 """Create endless iterators for all potential SSH check hosts.
3153 nodes = [node for node in all_nodes
3154 if (node.group != group_uuid and
3156 keyfunc = operator.attrgetter("group")
3158 return map(itertools.cycle,
3159 [sorted(map(operator.attrgetter("name"), names))
3160 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3164 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3165 """Choose which nodes should talk to which other nodes.
3167 We will make nodes contact all nodes in their group, and one node from
3170 @warning: This algorithm has a known issue if one node group is much
3171 smaller than others (e.g. just one node). In such a case all other
3172 nodes will talk to the single node.
3175 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3176 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3178 return (online_nodes,
3179 dict((name, sorted([i.next() for i in sel]))
3180 for name in online_nodes))
3182 def BuildHooksEnv(self):
3185 Cluster-Verify hooks just ran in the post phase and their failure makes
3186 the output be logged in the verify output and the verification to fail.
3190 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3193 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3194 for node in self.my_node_info.values())
3198 def BuildHooksNodes(self):
3199 """Build hooks nodes.
3202 return ([], self.my_node_names)
3204 def Exec(self, feedback_fn):
3205 """Verify integrity of the node group, performing various test on nodes.
3208 # This method has too many local variables. pylint: disable=R0914
3209 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3211 if not self.my_node_names:
3213 feedback_fn("* Empty node group, skipping verification")
3217 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3218 verbose = self.op.verbose
3219 self._feedback_fn = feedback_fn
3221 vg_name = self.cfg.GetVGName()
3222 drbd_helper = self.cfg.GetDRBDHelper()
3223 cluster = self.cfg.GetClusterInfo()
3224 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3225 hypervisors = cluster.enabled_hypervisors
3226 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3228 i_non_redundant = [] # Non redundant instances
3229 i_non_a_balanced = [] # Non auto-balanced instances
3230 i_offline = 0 # Count of offline instances
3231 n_offline = 0 # Count of offline nodes
3232 n_drained = 0 # Count of nodes being drained
3233 node_vol_should = {}
3235 # FIXME: verify OS list
3238 filemap = _ComputeAncillaryFiles(cluster, False)
3240 # do local checksums
3241 master_node = self.master_node = self.cfg.GetMasterNode()
3242 master_ip = self.cfg.GetMasterIP()
3244 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3247 if self.cfg.GetUseExternalMipScript():
3248 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3250 node_verify_param = {
3251 constants.NV_FILELIST:
3252 map(vcluster.MakeVirtualPath,
3253 utils.UniqueSequence(filename
3254 for files in filemap
3255 for filename in files)),
3256 constants.NV_NODELIST:
3257 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3258 self.all_node_info.values()),
3259 constants.NV_HYPERVISOR: hypervisors,
3260 constants.NV_HVPARAMS:
3261 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3262 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3263 for node in node_data_list
3264 if not node.offline],
3265 constants.NV_INSTANCELIST: hypervisors,
3266 constants.NV_VERSION: None,
3267 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3268 constants.NV_NODESETUP: None,
3269 constants.NV_TIME: None,
3270 constants.NV_MASTERIP: (master_node, master_ip),
3271 constants.NV_OSLIST: None,
3272 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3273 constants.NV_USERSCRIPTS: user_scripts,
3276 if vg_name is not None:
3277 node_verify_param[constants.NV_VGLIST] = None
3278 node_verify_param[constants.NV_LVLIST] = vg_name
3279 node_verify_param[constants.NV_PVLIST] = [vg_name]
3282 node_verify_param[constants.NV_DRBDLIST] = None
3283 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3285 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3286 # Load file storage paths only from master node
3287 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3290 # FIXME: this needs to be changed per node-group, not cluster-wide
3292 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3293 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3294 bridges.add(default_nicpp[constants.NIC_LINK])
3295 for instance in self.my_inst_info.values():
3296 for nic in instance.nics:
3297 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3298 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3299 bridges.add(full_nic[constants.NIC_LINK])
3302 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3304 # Build our expected cluster state
3305 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3307 vm_capable=node.vm_capable))
3308 for node in node_data_list)
3312 for node in self.all_node_info.values():
3313 path = _SupportsOob(self.cfg, node)
3314 if path and path not in oob_paths:
3315 oob_paths.append(path)
3318 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3320 for instance in self.my_inst_names:
3321 inst_config = self.my_inst_info[instance]
3322 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3325 for nname in inst_config.all_nodes:
3326 if nname not in node_image:
3327 gnode = self.NodeImage(name=nname)
3328 gnode.ghost = (nname not in self.all_node_info)
3329 node_image[nname] = gnode
3331 inst_config.MapLVsByNode(node_vol_should)
3333 pnode = inst_config.primary_node
3334 node_image[pnode].pinst.append(instance)
3336 for snode in inst_config.secondary_nodes:
3337 nimg = node_image[snode]
3338 nimg.sinst.append(instance)
3339 if pnode not in nimg.sbp:
3340 nimg.sbp[pnode] = []
3341 nimg.sbp[pnode].append(instance)
3343 # At this point, we have the in-memory data structures complete,
3344 # except for the runtime information, which we'll gather next
3346 # Due to the way our RPC system works, exact response times cannot be
3347 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3348 # time before and after executing the request, we can at least have a time
3350 nvinfo_starttime = time.time()
3351 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3353 self.cfg.GetClusterName())
3354 nvinfo_endtime = time.time()
3356 if self.extra_lv_nodes and vg_name is not None:
3358 self.rpc.call_node_verify(self.extra_lv_nodes,
3359 {constants.NV_LVLIST: vg_name},
3360 self.cfg.GetClusterName())
3362 extra_lv_nvinfo = {}
3364 all_drbd_map = self.cfg.ComputeDRBDMap()
3366 feedback_fn("* Gathering disk information (%s nodes)" %
3367 len(self.my_node_names))
3368 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3371 feedback_fn("* Verifying configuration file consistency")
3373 # If not all nodes are being checked, we need to make sure the master node
3374 # and a non-checked vm_capable node are in the list.
3375 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3377 vf_nvinfo = all_nvinfo.copy()
3378 vf_node_info = list(self.my_node_info.values())
3379 additional_nodes = []
3380 if master_node not in self.my_node_info:
3381 additional_nodes.append(master_node)
3382 vf_node_info.append(self.all_node_info[master_node])
3383 # Add the first vm_capable node we find which is not included,
3384 # excluding the master node (which we already have)
3385 for node in absent_nodes:
3386 nodeinfo = self.all_node_info[node]
3387 if (nodeinfo.vm_capable and not nodeinfo.offline and
3388 node != master_node):
3389 additional_nodes.append(node)
3390 vf_node_info.append(self.all_node_info[node])
3392 key = constants.NV_FILELIST
3393 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3394 {key: node_verify_param[key]},
3395 self.cfg.GetClusterName()))
3397 vf_nvinfo = all_nvinfo
3398 vf_node_info = self.my_node_info.values()
3400 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3402 feedback_fn("* Verifying node status")
3406 for node_i in node_data_list:
3408 nimg = node_image[node]
3412 feedback_fn("* Skipping offline node %s" % (node,))
3416 if node == master_node:
3418 elif node_i.master_candidate:
3419 ntype = "master candidate"
3420 elif node_i.drained:
3426 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3428 msg = all_nvinfo[node].fail_msg
3429 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3432 nimg.rpc_fail = True
3435 nresult = all_nvinfo[node].payload
3437 nimg.call_ok = self._VerifyNode(node_i, nresult)
3438 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3439 self._VerifyNodeNetwork(node_i, nresult)
3440 self._VerifyNodeUserScripts(node_i, nresult)
3441 self._VerifyOob(node_i, nresult)
3442 self._VerifyFileStoragePaths(node_i, nresult,
3443 node == master_node)
3446 self._VerifyNodeLVM(node_i, nresult, vg_name)
3447 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3450 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3451 self._UpdateNodeInstances(node_i, nresult, nimg)
3452 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3453 self._UpdateNodeOS(node_i, nresult, nimg)
3455 if not nimg.os_fail:
3456 if refos_img is None:
3458 self._VerifyNodeOS(node_i, nimg, refos_img)
3459 self._VerifyNodeBridges(node_i, nresult, bridges)
3461 # Check whether all running instancies are primary for the node. (This
3462 # can no longer be done from _VerifyInstance below, since some of the
3463 # wrong instances could be from other node groups.)
3464 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3466 for inst in non_primary_inst:
3467 test = inst in self.all_inst_info
3468 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3469 "instance should not run on node %s", node_i.name)
3470 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3471 "node is running unknown instance %s", inst)
3473 for node, result in extra_lv_nvinfo.items():
3474 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3475 node_image[node], vg_name)
3477 feedback_fn("* Verifying instance status")
3478 for instance in self.my_inst_names:
3480 feedback_fn("* Verifying instance %s" % instance)
3481 inst_config = self.my_inst_info[instance]
3482 self._VerifyInstance(instance, inst_config, node_image,
3484 inst_nodes_offline = []
3486 pnode = inst_config.primary_node
3487 pnode_img = node_image[pnode]
3488 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3489 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3490 " primary node failed", instance)
3492 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3494 constants.CV_EINSTANCEBADNODE, instance,
3495 "instance is marked as running and lives on offline node %s",
3496 inst_config.primary_node)
3498 # If the instance is non-redundant we cannot survive losing its primary
3499 # node, so we are not N+1 compliant.
3500 if inst_config.disk_template not in constants.DTS_MIRRORED:
3501 i_non_redundant.append(instance)
3503 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3504 constants.CV_EINSTANCELAYOUT,
3505 instance, "instance has multiple secondary nodes: %s",
3506 utils.CommaJoin(inst_config.secondary_nodes),
3507 code=self.ETYPE_WARNING)
3509 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3510 pnode = inst_config.primary_node
3511 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3512 instance_groups = {}
3514 for node in instance_nodes:
3515 instance_groups.setdefault(self.all_node_info[node].group,
3519 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3520 # Sort so that we always list the primary node first.
3521 for group, nodes in sorted(instance_groups.items(),
3522 key=lambda (_, nodes): pnode in nodes,
3525 self._ErrorIf(len(instance_groups) > 1,
3526 constants.CV_EINSTANCESPLITGROUPS,
3527 instance, "instance has primary and secondary nodes in"
3528 " different groups: %s", utils.CommaJoin(pretty_list),
3529 code=self.ETYPE_WARNING)
3531 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3532 i_non_a_balanced.append(instance)
3534 for snode in inst_config.secondary_nodes:
3535 s_img = node_image[snode]
3536 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3537 snode, "instance %s, connection to secondary node failed",
3541 inst_nodes_offline.append(snode)
3543 # warn that the instance lives on offline nodes
3544 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3545 "instance has offline secondary node(s) %s",
3546 utils.CommaJoin(inst_nodes_offline))
3547 # ... or ghost/non-vm_capable nodes
3548 for node in inst_config.all_nodes:
3549 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3550 instance, "instance lives on ghost node %s", node)
3551 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3552 instance, "instance lives on non-vm_capable node %s", node)
3554 feedback_fn("* Verifying orphan volumes")
3555 reserved = utils.FieldSet(*cluster.reserved_lvs)
3557 # We will get spurious "unknown volume" warnings if any node of this group
3558 # is secondary for an instance whose primary is in another group. To avoid
3559 # them, we find these instances and add their volumes to node_vol_should.
3560 for inst in self.all_inst_info.values():
3561 for secondary in inst.secondary_nodes:
3562 if (secondary in self.my_node_info
3563 and inst.name not in self.my_inst_info):
3564 inst.MapLVsByNode(node_vol_should)
3567 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3569 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3570 feedback_fn("* Verifying N+1 Memory redundancy")
3571 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3573 feedback_fn("* Other Notes")
3575 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3576 % len(i_non_redundant))
3578 if i_non_a_balanced:
3579 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3580 % len(i_non_a_balanced))
3583 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3586 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3589 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3593 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3594 """Analyze the post-hooks' result
3596 This method analyses the hook result, handles it, and sends some
3597 nicely-formatted feedback back to the user.
3599 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3600 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3601 @param hooks_results: the results of the multi-node hooks rpc call
3602 @param feedback_fn: function used send feedback back to the caller
3603 @param lu_result: previous Exec result
3604 @return: the new Exec result, based on the previous result
3608 # We only really run POST phase hooks, only for non-empty groups,
3609 # and are only interested in their results
3610 if not self.my_node_names:
3613 elif phase == constants.HOOKS_PHASE_POST:
3614 # Used to change hooks' output to proper indentation
3615 feedback_fn("* Hooks Results")
3616 assert hooks_results, "invalid result from hooks"
3618 for node_name in hooks_results:
3619 res = hooks_results[node_name]
3621 test = msg and not res.offline
3622 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3623 "Communication failure in hooks execution: %s", msg)
3624 if res.offline or msg:
3625 # No need to investigate payload if node is offline or gave
3628 for script, hkr, output in res.payload:
3629 test = hkr == constants.HKR_FAIL
3630 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3631 "Script %s failed, output:", script)
3633 output = self._HOOKS_INDENT_RE.sub(" ", output)
3634 feedback_fn("%s" % output)
3640 class LUClusterVerifyDisks(NoHooksLU):
3641 """Verifies the cluster disks status.
3646 def ExpandNames(self):
3647 self.share_locks = _ShareAll()
3648 self.needed_locks = {
3649 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3652 def Exec(self, feedback_fn):
3653 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3655 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3656 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3657 for group in group_names])
3660 class LUGroupVerifyDisks(NoHooksLU):
3661 """Verifies the status of all disks in a node group.
3666 def ExpandNames(self):
3667 # Raises errors.OpPrereqError on its own if group can't be found
3668 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3670 self.share_locks = _ShareAll()
3671 self.needed_locks = {
3672 locking.LEVEL_INSTANCE: [],
3673 locking.LEVEL_NODEGROUP: [],
3674 locking.LEVEL_NODE: [],
3677 def DeclareLocks(self, level):
3678 if level == locking.LEVEL_INSTANCE:
3679 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3681 # Lock instances optimistically, needs verification once node and group
3682 # locks have been acquired
3683 self.needed_locks[locking.LEVEL_INSTANCE] = \
3684 self.cfg.GetNodeGroupInstances(self.group_uuid)
3686 elif level == locking.LEVEL_NODEGROUP:
3687 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3689 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3690 set([self.group_uuid] +
3691 # Lock all groups used by instances optimistically; this requires
3692 # going via the node before it's locked, requiring verification
3695 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3696 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3698 elif level == locking.LEVEL_NODE:
3699 # This will only lock the nodes in the group to be verified which contain
3701 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3702 self._LockInstancesNodes()
3704 # Lock all nodes in group to be verified
3705 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3706 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3707 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3709 def CheckPrereq(self):
3710 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3711 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3712 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3714 assert self.group_uuid in owned_groups
3716 # Check if locked instances are still correct
3717 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3719 # Get instance information
3720 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3722 # Check if node groups for locked instances are still correct
3723 _CheckInstancesNodeGroups(self.cfg, self.instances,
3724 owned_groups, owned_nodes, self.group_uuid)
3726 def Exec(self, feedback_fn):
3727 """Verify integrity of cluster disks.
3729 @rtype: tuple of three items
3730 @return: a tuple of (dict of node-to-node_error, list of instances
3731 which need activate-disks, dict of instance: (node, volume) for
3736 res_instances = set()
3739 nv_dict = _MapInstanceDisksToNodes(
3740 [inst for inst in self.instances.values()
3741 if inst.admin_state == constants.ADMINST_UP])
3744 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3745 set(self.cfg.GetVmCapableNodeList()))
3747 node_lvs = self.rpc.call_lv_list(nodes, [])
3749 for (node, node_res) in node_lvs.items():
3750 if node_res.offline:
3753 msg = node_res.fail_msg
3755 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3756 res_nodes[node] = msg
3759 for lv_name, (_, _, lv_online) in node_res.payload.items():
3760 inst = nv_dict.pop((node, lv_name), None)
3761 if not (lv_online or inst is None):
3762 res_instances.add(inst)
3764 # any leftover items in nv_dict are missing LVs, let's arrange the data
3766 for key, inst in nv_dict.iteritems():
3767 res_missing.setdefault(inst, []).append(list(key))
3769 return (res_nodes, list(res_instances), res_missing)
3772 class LUClusterRepairDiskSizes(NoHooksLU):
3773 """Verifies the cluster disks sizes.
3778 def ExpandNames(self):
3779 if self.op.instances:
3780 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3781 self.needed_locks = {
3782 locking.LEVEL_NODE_RES: [],
3783 locking.LEVEL_INSTANCE: self.wanted_names,
3785 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3787 self.wanted_names = None
3788 self.needed_locks = {
3789 locking.LEVEL_NODE_RES: locking.ALL_SET,
3790 locking.LEVEL_INSTANCE: locking.ALL_SET,
3792 self.share_locks = {
3793 locking.LEVEL_NODE_RES: 1,
3794 locking.LEVEL_INSTANCE: 0,
3797 def DeclareLocks(self, level):
3798 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3799 self._LockInstancesNodes(primary_only=True, level=level)
3801 def CheckPrereq(self):
3802 """Check prerequisites.
3804 This only checks the optional instance list against the existing names.
3807 if self.wanted_names is None:
3808 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3810 self.wanted_instances = \
3811 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3813 def _EnsureChildSizes(self, disk):
3814 """Ensure children of the disk have the needed disk size.
3816 This is valid mainly for DRBD8 and fixes an issue where the
3817 children have smaller disk size.
3819 @param disk: an L{ganeti.objects.Disk} object
3822 if disk.dev_type == constants.LD_DRBD8:
3823 assert disk.children, "Empty children for DRBD8?"
3824 fchild = disk.children[0]
3825 mismatch = fchild.size < disk.size
3827 self.LogInfo("Child disk has size %d, parent %d, fixing",
3828 fchild.size, disk.size)
3829 fchild.size = disk.size
3831 # and we recurse on this child only, not on the metadev
3832 return self._EnsureChildSizes(fchild) or mismatch
3836 def Exec(self, feedback_fn):
3837 """Verify the size of cluster disks.
3840 # TODO: check child disks too
3841 # TODO: check differences in size between primary/secondary nodes
3843 for instance in self.wanted_instances:
3844 pnode = instance.primary_node
3845 if pnode not in per_node_disks:
3846 per_node_disks[pnode] = []
3847 for idx, disk in enumerate(instance.disks):
3848 per_node_disks[pnode].append((instance, idx, disk))
3850 assert not (frozenset(per_node_disks.keys()) -
3851 self.owned_locks(locking.LEVEL_NODE_RES)), \
3852 "Not owning correct locks"
3853 assert not self.owned_locks(locking.LEVEL_NODE)
3856 for node, dskl in per_node_disks.items():
3857 newl = [v[2].Copy() for v in dskl]
3859 self.cfg.SetDiskID(dsk, node)
3860 result = self.rpc.call_blockdev_getsize(node, newl)
3862 self.LogWarning("Failure in blockdev_getsize call to node"
3863 " %s, ignoring", node)
3865 if len(result.payload) != len(dskl):
3866 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3867 " result.payload=%s", node, len(dskl), result.payload)
3868 self.LogWarning("Invalid result from node %s, ignoring node results",
3871 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3873 self.LogWarning("Disk %d of instance %s did not return size"
3874 " information, ignoring", idx, instance.name)
3876 if not isinstance(size, (int, long)):
3877 self.LogWarning("Disk %d of instance %s did not return valid"
3878 " size information, ignoring", idx, instance.name)
3881 if size != disk.size:
3882 self.LogInfo("Disk %d of instance %s has mismatched size,"
3883 " correcting: recorded %d, actual %d", idx,
3884 instance.name, disk.size, size)
3886 self.cfg.Update(instance, feedback_fn)
3887 changed.append((instance.name, idx, size))
3888 if self._EnsureChildSizes(disk):
3889 self.cfg.Update(instance, feedback_fn)
3890 changed.append((instance.name, idx, disk.size))
3894 class LUClusterRename(LogicalUnit):
3895 """Rename the cluster.
3898 HPATH = "cluster-rename"
3899 HTYPE = constants.HTYPE_CLUSTER
3901 def BuildHooksEnv(self):
3906 "OP_TARGET": self.cfg.GetClusterName(),
3907 "NEW_NAME": self.op.name,
3910 def BuildHooksNodes(self):
3911 """Build hooks nodes.
3914 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3916 def CheckPrereq(self):
3917 """Verify that the passed name is a valid one.
3920 hostname = netutils.GetHostname(name=self.op.name,
3921 family=self.cfg.GetPrimaryIPFamily())
3923 new_name = hostname.name
3924 self.ip = new_ip = hostname.ip
3925 old_name = self.cfg.GetClusterName()
3926 old_ip = self.cfg.GetMasterIP()
3927 if new_name == old_name and new_ip == old_ip:
3928 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3929 " cluster has changed",
3931 if new_ip != old_ip:
3932 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3933 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3934 " reachable on the network" %
3935 new_ip, errors.ECODE_NOTUNIQUE)
3937 self.op.name = new_name
3939 def Exec(self, feedback_fn):
3940 """Rename the cluster.
3943 clustername = self.op.name
3946 # shutdown the master IP
3947 master_params = self.cfg.GetMasterNetworkParameters()
3948 ems = self.cfg.GetUseExternalMipScript()
3949 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3951 result.Raise("Could not disable the master role")
3954 cluster = self.cfg.GetClusterInfo()
3955 cluster.cluster_name = clustername
3956 cluster.master_ip = new_ip
3957 self.cfg.Update(cluster, feedback_fn)
3959 # update the known hosts file
3960 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3961 node_list = self.cfg.GetOnlineNodeList()
3963 node_list.remove(master_params.name)
3966 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3968 master_params.ip = new_ip
3969 result = self.rpc.call_node_activate_master_ip(master_params.name,
3971 msg = result.fail_msg
3973 self.LogWarning("Could not re-enable the master role on"
3974 " the master, please restart manually: %s", msg)
3979 def _ValidateNetmask(cfg, netmask):
3980 """Checks if a netmask is valid.
3982 @type cfg: L{config.ConfigWriter}
3983 @param cfg: The cluster configuration
3985 @param netmask: the netmask to be verified
3986 @raise errors.OpPrereqError: if the validation fails
3989 ip_family = cfg.GetPrimaryIPFamily()
3991 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3992 except errors.ProgrammerError:
3993 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3994 ip_family, errors.ECODE_INVAL)
3995 if not ipcls.ValidateNetmask(netmask):
3996 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3997 (netmask), errors.ECODE_INVAL)
4000 class LUClusterSetParams(LogicalUnit):
4001 """Change the parameters of the cluster.
4004 HPATH = "cluster-modify"
4005 HTYPE = constants.HTYPE_CLUSTER
4008 def CheckArguments(self):
4012 if self.op.uid_pool:
4013 uidpool.CheckUidPool(self.op.uid_pool)
4015 if self.op.add_uids:
4016 uidpool.CheckUidPool(self.op.add_uids)
4018 if self.op.remove_uids:
4019 uidpool.CheckUidPool(self.op.remove_uids)
4021 if self.op.master_netmask is not None:
4022 _ValidateNetmask(self.cfg, self.op.master_netmask)
4024 if self.op.diskparams:
4025 for dt_params in self.op.diskparams.values():
4026 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4028 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4029 except errors.OpPrereqError, err:
4030 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4033 def ExpandNames(self):
4034 # FIXME: in the future maybe other cluster params won't require checking on
4035 # all nodes to be modified.
4036 self.needed_locks = {
4037 locking.LEVEL_NODE: locking.ALL_SET,
4038 locking.LEVEL_INSTANCE: locking.ALL_SET,
4039 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4041 self.share_locks = {
4042 locking.LEVEL_NODE: 1,
4043 locking.LEVEL_INSTANCE: 1,
4044 locking.LEVEL_NODEGROUP: 1,
4047 def BuildHooksEnv(self):
4052 "OP_TARGET": self.cfg.GetClusterName(),
4053 "NEW_VG_NAME": self.op.vg_name,
4056 def BuildHooksNodes(self):
4057 """Build hooks nodes.
4060 mn = self.cfg.GetMasterNode()
4063 def CheckPrereq(self):
4064 """Check prerequisites.
4066 This checks whether the given params don't conflict and
4067 if the given volume group is valid.
4070 if self.op.vg_name is not None and not self.op.vg_name:
4071 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4072 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4073 " instances exist", errors.ECODE_INVAL)
4075 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4076 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4077 raise errors.OpPrereqError("Cannot disable drbd helper while"
4078 " drbd-based instances exist",
4081 node_list = self.owned_locks(locking.LEVEL_NODE)
4083 # if vg_name not None, checks given volume group on all nodes
4085 vglist = self.rpc.call_vg_list(node_list)
4086 for node in node_list:
4087 msg = vglist[node].fail_msg
4089 # ignoring down node
4090 self.LogWarning("Error while gathering data on node %s"
4091 " (ignoring node): %s", node, msg)
4093 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4095 constants.MIN_VG_SIZE)
4097 raise errors.OpPrereqError("Error on node '%s': %s" %
4098 (node, vgstatus), errors.ECODE_ENVIRON)
4100 if self.op.drbd_helper:
4101 # checks given drbd helper on all nodes
4102 helpers = self.rpc.call_drbd_helper(node_list)
4103 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4105 self.LogInfo("Not checking drbd helper on offline node %s", node)
4107 msg = helpers[node].fail_msg
4109 raise errors.OpPrereqError("Error checking drbd helper on node"
4110 " '%s': %s" % (node, msg),
4111 errors.ECODE_ENVIRON)
4112 node_helper = helpers[node].payload
4113 if node_helper != self.op.drbd_helper:
4114 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4115 (node, node_helper), errors.ECODE_ENVIRON)
4117 self.cluster = cluster = self.cfg.GetClusterInfo()
4118 # validate params changes
4119 if self.op.beparams:
4120 objects.UpgradeBeParams(self.op.beparams)
4121 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4122 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4124 if self.op.ndparams:
4125 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4126 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4128 # TODO: we need a more general way to handle resetting
4129 # cluster-level parameters to default values
4130 if self.new_ndparams["oob_program"] == "":
4131 self.new_ndparams["oob_program"] = \
4132 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4134 if self.op.hv_state:
4135 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4136 self.cluster.hv_state_static)
4137 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4138 for hv, values in new_hv_state.items())
4140 if self.op.disk_state:
4141 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4142 self.cluster.disk_state_static)
4143 self.new_disk_state = \
4144 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4145 for name, values in svalues.items()))
4146 for storage, svalues in new_disk_state.items())
4149 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4152 all_instances = self.cfg.GetAllInstancesInfo().values()
4154 for group in self.cfg.GetAllNodeGroupsInfo().values():
4155 instances = frozenset([inst for inst in all_instances
4156 if compat.any(node in group.members
4157 for node in inst.all_nodes)])
4158 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4159 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4160 new = _ComputeNewInstanceViolations(ipol,
4161 new_ipolicy, instances)
4163 violations.update(new)
4166 self.LogWarning("After the ipolicy change the following instances"
4167 " violate them: %s",
4168 utils.CommaJoin(utils.NiceSort(violations)))
4170 if self.op.nicparams:
4171 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4172 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4173 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4176 # check all instances for consistency
4177 for instance in self.cfg.GetAllInstancesInfo().values():
4178 for nic_idx, nic in enumerate(instance.nics):
4179 params_copy = copy.deepcopy(nic.nicparams)
4180 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4182 # check parameter syntax
4184 objects.NIC.CheckParameterSyntax(params_filled)
4185 except errors.ConfigurationError, err:
4186 nic_errors.append("Instance %s, nic/%d: %s" %
4187 (instance.name, nic_idx, err))
4189 # if we're moving instances to routed, check that they have an ip
4190 target_mode = params_filled[constants.NIC_MODE]
4191 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4192 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4193 " address" % (instance.name, nic_idx))
4195 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4196 "\n".join(nic_errors), errors.ECODE_INVAL)
4198 # hypervisor list/parameters
4199 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4200 if self.op.hvparams:
4201 for hv_name, hv_dict in self.op.hvparams.items():
4202 if hv_name not in self.new_hvparams:
4203 self.new_hvparams[hv_name] = hv_dict
4205 self.new_hvparams[hv_name].update(hv_dict)
4207 # disk template parameters
4208 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4209 if self.op.diskparams:
4210 for dt_name, dt_params in self.op.diskparams.items():
4211 if dt_name not in self.op.diskparams:
4212 self.new_diskparams[dt_name] = dt_params
4214 self.new_diskparams[dt_name].update(dt_params)
4216 # os hypervisor parameters
4217 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4219 for os_name, hvs in self.op.os_hvp.items():
4220 if os_name not in self.new_os_hvp:
4221 self.new_os_hvp[os_name] = hvs
4223 for hv_name, hv_dict in hvs.items():
4224 if hv_name not in self.new_os_hvp[os_name]:
4225 self.new_os_hvp[os_name][hv_name] = hv_dict
4227 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4230 self.new_osp = objects.FillDict(cluster.osparams, {})
4231 if self.op.osparams:
4232 for os_name, osp in self.op.osparams.items():
4233 if os_name not in self.new_osp:
4234 self.new_osp[os_name] = {}
4236 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4239 if not self.new_osp[os_name]:
4240 # we removed all parameters
4241 del self.new_osp[os_name]
4243 # check the parameter validity (remote check)
4244 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4245 os_name, self.new_osp[os_name])
4247 # changes to the hypervisor list
4248 if self.op.enabled_hypervisors is not None:
4249 self.hv_list = self.op.enabled_hypervisors
4250 for hv in self.hv_list:
4251 # if the hypervisor doesn't already exist in the cluster
4252 # hvparams, we initialize it to empty, and then (in both
4253 # cases) we make sure to fill the defaults, as we might not
4254 # have a complete defaults list if the hypervisor wasn't
4256 if hv not in new_hvp:
4258 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4259 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4261 self.hv_list = cluster.enabled_hypervisors
4263 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4264 # either the enabled list has changed, or the parameters have, validate
4265 for hv_name, hv_params in self.new_hvparams.items():
4266 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4267 (self.op.enabled_hypervisors and
4268 hv_name in self.op.enabled_hypervisors)):
4269 # either this is a new hypervisor, or its parameters have changed
4270 hv_class = hypervisor.GetHypervisor(hv_name)
4271 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4272 hv_class.CheckParameterSyntax(hv_params)
4273 _CheckHVParams(self, node_list, hv_name, hv_params)
4276 # no need to check any newly-enabled hypervisors, since the
4277 # defaults have already been checked in the above code-block
4278 for os_name, os_hvp in self.new_os_hvp.items():
4279 for hv_name, hv_params in os_hvp.items():
4280 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4281 # we need to fill in the new os_hvp on top of the actual hv_p
4282 cluster_defaults = self.new_hvparams.get(hv_name, {})
4283 new_osp = objects.FillDict(cluster_defaults, hv_params)
4284 hv_class = hypervisor.GetHypervisor(hv_name)
4285 hv_class.CheckParameterSyntax(new_osp)
4286 _CheckHVParams(self, node_list, hv_name, new_osp)
4288 if self.op.default_iallocator:
4289 alloc_script = utils.FindFile(self.op.default_iallocator,
4290 constants.IALLOCATOR_SEARCH_PATH,
4292 if alloc_script is None:
4293 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4294 " specified" % self.op.default_iallocator,
4297 def Exec(self, feedback_fn):
4298 """Change the parameters of the cluster.
4301 if self.op.vg_name is not None:
4302 new_volume = self.op.vg_name
4305 if new_volume != self.cfg.GetVGName():
4306 self.cfg.SetVGName(new_volume)
4308 feedback_fn("Cluster LVM configuration already in desired"
4309 " state, not changing")
4310 if self.op.drbd_helper is not None:
4311 new_helper = self.op.drbd_helper
4314 if new_helper != self.cfg.GetDRBDHelper():
4315 self.cfg.SetDRBDHelper(new_helper)
4317 feedback_fn("Cluster DRBD helper already in desired state,"
4319 if self.op.hvparams:
4320 self.cluster.hvparams = self.new_hvparams
4322 self.cluster.os_hvp = self.new_os_hvp
4323 if self.op.enabled_hypervisors is not None:
4324 self.cluster.hvparams = self.new_hvparams
4325 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4326 if self.op.beparams:
4327 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4328 if self.op.nicparams:
4329 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4331 self.cluster.ipolicy = self.new_ipolicy
4332 if self.op.osparams:
4333 self.cluster.osparams = self.new_osp
4334 if self.op.ndparams:
4335 self.cluster.ndparams = self.new_ndparams
4336 if self.op.diskparams:
4337 self.cluster.diskparams = self.new_diskparams
4338 if self.op.hv_state:
4339 self.cluster.hv_state_static = self.new_hv_state
4340 if self.op.disk_state:
4341 self.cluster.disk_state_static = self.new_disk_state
4343 if self.op.candidate_pool_size is not None:
4344 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4345 # we need to update the pool size here, otherwise the save will fail
4346 _AdjustCandidatePool(self, [])
4348 if self.op.maintain_node_health is not None:
4349 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4350 feedback_fn("Note: CONFD was disabled at build time, node health"
4351 " maintenance is not useful (still enabling it)")
4352 self.cluster.maintain_node_health = self.op.maintain_node_health
4354 if self.op.prealloc_wipe_disks is not None:
4355 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4357 if self.op.add_uids is not None:
4358 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4360 if self.op.remove_uids is not None:
4361 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4363 if self.op.uid_pool is not None:
4364 self.cluster.uid_pool = self.op.uid_pool
4366 if self.op.default_iallocator is not None:
4367 self.cluster.default_iallocator = self.op.default_iallocator
4369 if self.op.reserved_lvs is not None:
4370 self.cluster.reserved_lvs = self.op.reserved_lvs
4372 if self.op.use_external_mip_script is not None:
4373 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4375 def helper_os(aname, mods, desc):
4377 lst = getattr(self.cluster, aname)
4378 for key, val in mods:
4379 if key == constants.DDM_ADD:
4381 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4384 elif key == constants.DDM_REMOVE:
4388 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4390 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4392 if self.op.hidden_os:
4393 helper_os("hidden_os", self.op.hidden_os, "hidden")
4395 if self.op.blacklisted_os:
4396 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4398 if self.op.master_netdev:
4399 master_params = self.cfg.GetMasterNetworkParameters()
4400 ems = self.cfg.GetUseExternalMipScript()
4401 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4402 self.cluster.master_netdev)
4403 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4405 result.Raise("Could not disable the master ip")
4406 feedback_fn("Changing master_netdev from %s to %s" %
4407 (master_params.netdev, self.op.master_netdev))
4408 self.cluster.master_netdev = self.op.master_netdev
4410 if self.op.master_netmask:
4411 master_params = self.cfg.GetMasterNetworkParameters()
4412 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4413 result = self.rpc.call_node_change_master_netmask(master_params.name,
4414 master_params.netmask,
4415 self.op.master_netmask,
4417 master_params.netdev)
4419 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4422 self.cluster.master_netmask = self.op.master_netmask
4424 self.cfg.Update(self.cluster, feedback_fn)
4426 if self.op.master_netdev:
4427 master_params = self.cfg.GetMasterNetworkParameters()
4428 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4429 self.op.master_netdev)
4430 ems = self.cfg.GetUseExternalMipScript()
4431 result = self.rpc.call_node_activate_master_ip(master_params.name,
4434 self.LogWarning("Could not re-enable the master ip on"
4435 " the master, please restart manually: %s",
4439 def _UploadHelper(lu, nodes, fname):
4440 """Helper for uploading a file and showing warnings.
4443 if os.path.exists(fname):
4444 result = lu.rpc.call_upload_file(nodes, fname)
4445 for to_node, to_result in result.items():
4446 msg = to_result.fail_msg
4448 msg = ("Copy of file %s to node %s failed: %s" %
4449 (fname, to_node, msg))
4450 lu.proc.LogWarning(msg)
4453 def _ComputeAncillaryFiles(cluster, redist):
4454 """Compute files external to Ganeti which need to be consistent.
4456 @type redist: boolean
4457 @param redist: Whether to include files which need to be redistributed
4460 # Compute files for all nodes
4462 pathutils.SSH_KNOWN_HOSTS_FILE,
4463 pathutils.CONFD_HMAC_KEY,
4464 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4465 pathutils.SPICE_CERT_FILE,
4466 pathutils.SPICE_CACERT_FILE,
4467 pathutils.RAPI_USERS_FILE,
4471 # we need to ship at least the RAPI certificate
4472 files_all.add(pathutils.RAPI_CERT_FILE)
4474 files_all.update(pathutils.ALL_CERT_FILES)
4475 files_all.update(ssconf.SimpleStore().GetFileList())
4477 if cluster.modify_etc_hosts:
4478 files_all.add(pathutils.ETC_HOSTS)
4480 if cluster.use_external_mip_script:
4481 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4483 # Files which are optional, these must:
4484 # - be present in one other category as well
4485 # - either exist or not exist on all nodes of that category (mc, vm all)
4487 pathutils.RAPI_USERS_FILE,
4490 # Files which should only be on master candidates
4494 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4498 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4499 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4500 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4502 # Files which should only be on VM-capable nodes
4505 for hv_name in cluster.enabled_hypervisors
4506 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4510 for hv_name in cluster.enabled_hypervisors
4511 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4513 # Filenames in each category must be unique
4514 all_files_set = files_all | files_mc | files_vm
4515 assert (len(all_files_set) ==
4516 sum(map(len, [files_all, files_mc, files_vm]))), \
4517 "Found file listed in more than one file list"
4519 # Optional files must be present in one other category
4520 assert all_files_set.issuperset(files_opt), \
4521 "Optional file not in a different required list"
4523 # This one file should never ever be re-distributed via RPC
4524 assert not (redist and
4525 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4527 return (files_all, files_opt, files_mc, files_vm)
4530 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4531 """Distribute additional files which are part of the cluster configuration.
4533 ConfigWriter takes care of distributing the config and ssconf files, but
4534 there are more files which should be distributed to all nodes. This function
4535 makes sure those are copied.
4537 @param lu: calling logical unit
4538 @param additional_nodes: list of nodes not in the config to distribute to
4539 @type additional_vm: boolean
4540 @param additional_vm: whether the additional nodes are vm-capable or not
4543 # Gather target nodes
4544 cluster = lu.cfg.GetClusterInfo()
4545 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4547 online_nodes = lu.cfg.GetOnlineNodeList()
4548 online_set = frozenset(online_nodes)
4549 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4551 if additional_nodes is not None:
4552 online_nodes.extend(additional_nodes)
4554 vm_nodes.extend(additional_nodes)
4556 # Never distribute to master node
4557 for nodelist in [online_nodes, vm_nodes]:
4558 if master_info.name in nodelist:
4559 nodelist.remove(master_info.name)
4562 (files_all, _, files_mc, files_vm) = \
4563 _ComputeAncillaryFiles(cluster, True)
4565 # Never re-distribute configuration file from here
4566 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4567 pathutils.CLUSTER_CONF_FILE in files_vm)
4568 assert not files_mc, "Master candidates not handled in this function"
4571 (online_nodes, files_all),
4572 (vm_nodes, files_vm),
4576 for (node_list, files) in filemap:
4578 _UploadHelper(lu, node_list, fname)
4581 class LUClusterRedistConf(NoHooksLU):
4582 """Force the redistribution of cluster configuration.
4584 This is a very simple LU.
4589 def ExpandNames(self):
4590 self.needed_locks = {
4591 locking.LEVEL_NODE: locking.ALL_SET,
4593 self.share_locks[locking.LEVEL_NODE] = 1
4595 def Exec(self, feedback_fn):
4596 """Redistribute the configuration.
4599 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4600 _RedistributeAncillaryFiles(self)
4603 class LUClusterActivateMasterIp(NoHooksLU):
4604 """Activate the master IP on the master node.
4607 def Exec(self, feedback_fn):
4608 """Activate the master IP.
4611 master_params = self.cfg.GetMasterNetworkParameters()
4612 ems = self.cfg.GetUseExternalMipScript()
4613 result = self.rpc.call_node_activate_master_ip(master_params.name,
4615 result.Raise("Could not activate the master IP")
4618 class LUClusterDeactivateMasterIp(NoHooksLU):
4619 """Deactivate the master IP on the master node.
4622 def Exec(self, feedback_fn):
4623 """Deactivate the master IP.
4626 master_params = self.cfg.GetMasterNetworkParameters()
4627 ems = self.cfg.GetUseExternalMipScript()
4628 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4630 result.Raise("Could not deactivate the master IP")
4633 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4634 """Sleep and poll for an instance's disk to sync.
4637 if not instance.disks or disks is not None and not disks:
4640 disks = _ExpandCheckDisks(instance, disks)
4643 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4645 node = instance.primary_node
4648 lu.cfg.SetDiskID(dev, node)
4650 # TODO: Convert to utils.Retry
4653 degr_retries = 10 # in seconds, as we sleep 1 second each time
4657 cumul_degraded = False
4658 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4659 msg = rstats.fail_msg
4661 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4664 raise errors.RemoteError("Can't contact node %s for mirror data,"
4665 " aborting." % node)
4668 rstats = rstats.payload
4670 for i, mstat in enumerate(rstats):
4672 lu.LogWarning("Can't compute data for node %s/%s",
4673 node, disks[i].iv_name)
4676 cumul_degraded = (cumul_degraded or
4677 (mstat.is_degraded and mstat.sync_percent is None))
4678 if mstat.sync_percent is not None:
4680 if mstat.estimated_time is not None:
4681 rem_time = ("%s remaining (estimated)" %
4682 utils.FormatSeconds(mstat.estimated_time))
4683 max_time = mstat.estimated_time
4685 rem_time = "no time estimate"
4686 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4687 (disks[i].iv_name, mstat.sync_percent, rem_time))
4689 # if we're done but degraded, let's do a few small retries, to
4690 # make sure we see a stable and not transient situation; therefore
4691 # we force restart of the loop
4692 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4693 logging.info("Degraded disks found, %d retries left", degr_retries)
4701 time.sleep(min(60, max_time))
4704 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4705 return not cumul_degraded
4708 def _BlockdevFind(lu, node, dev, instance):
4709 """Wrapper around call_blockdev_find to annotate diskparams.
4711 @param lu: A reference to the lu object
4712 @param node: The node to call out
4713 @param dev: The device to find
4714 @param instance: The instance object the device belongs to
4715 @returns The result of the rpc call
4718 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4719 return lu.rpc.call_blockdev_find(node, disk)
4722 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4723 """Wrapper around L{_CheckDiskConsistencyInner}.
4726 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4727 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4731 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4733 """Check that mirrors are not degraded.
4735 @attention: The device has to be annotated already.
4737 The ldisk parameter, if True, will change the test from the
4738 is_degraded attribute (which represents overall non-ok status for
4739 the device(s)) to the ldisk (representing the local storage status).
4742 lu.cfg.SetDiskID(dev, node)
4746 if on_primary or dev.AssembleOnSecondary():
4747 rstats = lu.rpc.call_blockdev_find(node, dev)
4748 msg = rstats.fail_msg
4750 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4752 elif not rstats.payload:
4753 lu.LogWarning("Can't find disk on node %s", node)
4757 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4759 result = result and not rstats.payload.is_degraded
4762 for child in dev.children:
4763 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4769 class LUOobCommand(NoHooksLU):
4770 """Logical unit for OOB handling.
4774 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4776 def ExpandNames(self):
4777 """Gather locks we need.
4780 if self.op.node_names:
4781 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4782 lock_names = self.op.node_names
4784 lock_names = locking.ALL_SET
4786 self.needed_locks = {
4787 locking.LEVEL_NODE: lock_names,
4790 def CheckPrereq(self):
4791 """Check prerequisites.
4794 - the node exists in the configuration
4797 Any errors are signaled by raising errors.OpPrereqError.
4801 self.master_node = self.cfg.GetMasterNode()
4803 assert self.op.power_delay >= 0.0
4805 if self.op.node_names:
4806 if (self.op.command in self._SKIP_MASTER and
4807 self.master_node in self.op.node_names):
4808 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4809 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4811 if master_oob_handler:
4812 additional_text = ("run '%s %s %s' if you want to operate on the"
4813 " master regardless") % (master_oob_handler,
4817 additional_text = "it does not support out-of-band operations"
4819 raise errors.OpPrereqError(("Operating on the master node %s is not"
4820 " allowed for %s; %s") %
4821 (self.master_node, self.op.command,
4822 additional_text), errors.ECODE_INVAL)
4824 self.op.node_names = self.cfg.GetNodeList()
4825 if self.op.command in self._SKIP_MASTER:
4826 self.op.node_names.remove(self.master_node)
4828 if self.op.command in self._SKIP_MASTER:
4829 assert self.master_node not in self.op.node_names
4831 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4833 raise errors.OpPrereqError("Node %s not found" % node_name,
4836 self.nodes.append(node)
4838 if (not self.op.ignore_status and
4839 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4840 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4841 " not marked offline") % node_name,
4844 def Exec(self, feedback_fn):
4845 """Execute OOB and return result if we expect any.
4848 master_node = self.master_node
4851 for idx, node in enumerate(utils.NiceSort(self.nodes,
4852 key=lambda node: node.name)):
4853 node_entry = [(constants.RS_NORMAL, node.name)]
4854 ret.append(node_entry)
4856 oob_program = _SupportsOob(self.cfg, node)
4859 node_entry.append((constants.RS_UNAVAIL, None))
4862 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4863 self.op.command, oob_program, node.name)
4864 result = self.rpc.call_run_oob(master_node, oob_program,
4865 self.op.command, node.name,
4869 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4870 node.name, result.fail_msg)
4871 node_entry.append((constants.RS_NODATA, None))
4874 self._CheckPayload(result)
4875 except errors.OpExecError, err:
4876 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4878 node_entry.append((constants.RS_NODATA, None))
4880 if self.op.command == constants.OOB_HEALTH:
4881 # For health we should log important events
4882 for item, status in result.payload:
4883 if status in [constants.OOB_STATUS_WARNING,
4884 constants.OOB_STATUS_CRITICAL]:
4885 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4886 item, node.name, status)
4888 if self.op.command == constants.OOB_POWER_ON:
4890 elif self.op.command == constants.OOB_POWER_OFF:
4891 node.powered = False
4892 elif self.op.command == constants.OOB_POWER_STATUS:
4893 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4894 if powered != node.powered:
4895 logging.warning(("Recorded power state (%s) of node '%s' does not"
4896 " match actual power state (%s)"), node.powered,
4899 # For configuration changing commands we should update the node
4900 if self.op.command in (constants.OOB_POWER_ON,
4901 constants.OOB_POWER_OFF):
4902 self.cfg.Update(node, feedback_fn)
4904 node_entry.append((constants.RS_NORMAL, result.payload))
4906 if (self.op.command == constants.OOB_POWER_ON and
4907 idx < len(self.nodes) - 1):
4908 time.sleep(self.op.power_delay)
4912 def _CheckPayload(self, result):
4913 """Checks if the payload is valid.
4915 @param result: RPC result
4916 @raises errors.OpExecError: If payload is not valid
4920 if self.op.command == constants.OOB_HEALTH:
4921 if not isinstance(result.payload, list):
4922 errs.append("command 'health' is expected to return a list but got %s" %
4923 type(result.payload))
4925 for item, status in result.payload:
4926 if status not in constants.OOB_STATUSES:
4927 errs.append("health item '%s' has invalid status '%s'" %
4930 if self.op.command == constants.OOB_POWER_STATUS:
4931 if not isinstance(result.payload, dict):
4932 errs.append("power-status is expected to return a dict but got %s" %
4933 type(result.payload))
4935 if self.op.command in [
4936 constants.OOB_POWER_ON,
4937 constants.OOB_POWER_OFF,
4938 constants.OOB_POWER_CYCLE,
4940 if result.payload is not None:
4941 errs.append("%s is expected to not return payload but got '%s'" %
4942 (self.op.command, result.payload))
4945 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4946 utils.CommaJoin(errs))
4949 class _OsQuery(_QueryBase):
4950 FIELDS = query.OS_FIELDS
4952 def ExpandNames(self, lu):
4953 # Lock all nodes in shared mode
4954 # Temporary removal of locks, should be reverted later
4955 # TODO: reintroduce locks when they are lighter-weight
4956 lu.needed_locks = {}
4957 #self.share_locks[locking.LEVEL_NODE] = 1
4958 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4960 # The following variables interact with _QueryBase._GetNames
4962 self.wanted = self.names
4964 self.wanted = locking.ALL_SET
4966 self.do_locking = self.use_locking
4968 def DeclareLocks(self, lu, level):
4972 def _DiagnoseByOS(rlist):
4973 """Remaps a per-node return list into an a per-os per-node dictionary
4975 @param rlist: a map with node names as keys and OS objects as values
4978 @return: a dictionary with osnames as keys and as value another
4979 map, with nodes as keys and tuples of (path, status, diagnose,
4980 variants, parameters, api_versions) as values, eg::
4982 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4983 (/srv/..., False, "invalid api")],
4984 "node2": [(/srv/..., True, "", [], [])]}
4989 # we build here the list of nodes that didn't fail the RPC (at RPC
4990 # level), so that nodes with a non-responding node daemon don't
4991 # make all OSes invalid
4992 good_nodes = [node_name for node_name in rlist
4993 if not rlist[node_name].fail_msg]
4994 for node_name, nr in rlist.items():
4995 if nr.fail_msg or not nr.payload:
4997 for (name, path, status, diagnose, variants,
4998 params, api_versions) in nr.payload:
4999 if name not in all_os:
5000 # build a list of nodes for this os containing empty lists
5001 # for each node in node_list
5003 for nname in good_nodes:
5004 all_os[name][nname] = []
5005 # convert params from [name, help] to (name, help)
5006 params = [tuple(v) for v in params]
5007 all_os[name][node_name].append((path, status, diagnose,
5008 variants, params, api_versions))
5011 def _GetQueryData(self, lu):
5012 """Computes the list of nodes and their attributes.
5015 # Locking is not used
5016 assert not (compat.any(lu.glm.is_owned(level)
5017 for level in locking.LEVELS
5018 if level != locking.LEVEL_CLUSTER) or
5019 self.do_locking or self.use_locking)
5021 valid_nodes = [node.name
5022 for node in lu.cfg.GetAllNodesInfo().values()
5023 if not node.offline and node.vm_capable]
5024 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5025 cluster = lu.cfg.GetClusterInfo()
5029 for (os_name, os_data) in pol.items():
5030 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5031 hidden=(os_name in cluster.hidden_os),
5032 blacklisted=(os_name in cluster.blacklisted_os))
5036 api_versions = set()
5038 for idx, osl in enumerate(os_data.values()):
5039 info.valid = bool(info.valid and osl and osl[0][1])
5043 (node_variants, node_params, node_api) = osl[0][3:6]
5046 variants.update(node_variants)
5047 parameters.update(node_params)
5048 api_versions.update(node_api)
5050 # Filter out inconsistent values
5051 variants.intersection_update(node_variants)
5052 parameters.intersection_update(node_params)
5053 api_versions.intersection_update(node_api)
5055 info.variants = list(variants)
5056 info.parameters = list(parameters)
5057 info.api_versions = list(api_versions)
5059 data[os_name] = info
5061 # Prepare data in requested order
5062 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5066 class LUOsDiagnose(NoHooksLU):
5067 """Logical unit for OS diagnose/query.
5073 def _BuildFilter(fields, names):
5074 """Builds a filter for querying OSes.
5077 name_filter = qlang.MakeSimpleFilter("name", names)
5079 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5080 # respective field is not requested
5081 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5082 for fname in ["hidden", "blacklisted"]
5083 if fname not in fields]
5084 if "valid" not in fields:
5085 status_filter.append([qlang.OP_TRUE, "valid"])
5088 status_filter.insert(0, qlang.OP_AND)
5090 status_filter = None
5092 if name_filter and status_filter:
5093 return [qlang.OP_AND, name_filter, status_filter]
5097 return status_filter
5099 def CheckArguments(self):
5100 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5101 self.op.output_fields, False)
5103 def ExpandNames(self):
5104 self.oq.ExpandNames(self)
5106 def Exec(self, feedback_fn):
5107 return self.oq.OldStyleQuery(self)
5110 class LUNodeRemove(LogicalUnit):
5111 """Logical unit for removing a node.
5114 HPATH = "node-remove"
5115 HTYPE = constants.HTYPE_NODE
5117 def BuildHooksEnv(self):
5122 "OP_TARGET": self.op.node_name,
5123 "NODE_NAME": self.op.node_name,
5126 def BuildHooksNodes(self):
5127 """Build hooks nodes.
5129 This doesn't run on the target node in the pre phase as a failed
5130 node would then be impossible to remove.
5133 all_nodes = self.cfg.GetNodeList()
5135 all_nodes.remove(self.op.node_name)
5138 return (all_nodes, all_nodes)
5140 def CheckPrereq(self):
5141 """Check prerequisites.
5144 - the node exists in the configuration
5145 - it does not have primary or secondary instances
5146 - it's not the master
5148 Any errors are signaled by raising errors.OpPrereqError.
5151 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5152 node = self.cfg.GetNodeInfo(self.op.node_name)
5153 assert node is not None
5155 masternode = self.cfg.GetMasterNode()
5156 if node.name == masternode:
5157 raise errors.OpPrereqError("Node is the master node, failover to another"
5158 " node is required", errors.ECODE_INVAL)
5160 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5161 if node.name in instance.all_nodes:
5162 raise errors.OpPrereqError("Instance %s is still running on the node,"
5163 " please remove first" % instance_name,
5165 self.op.node_name = node.name
5168 def Exec(self, feedback_fn):
5169 """Removes the node from the cluster.
5173 logging.info("Stopping the node daemon and removing configs from node %s",
5176 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5178 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5181 # Promote nodes to master candidate as needed
5182 _AdjustCandidatePool(self, exceptions=[node.name])
5183 self.context.RemoveNode(node.name)
5185 # Run post hooks on the node before it's removed
5186 _RunPostHook(self, node.name)
5188 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5189 msg = result.fail_msg
5191 self.LogWarning("Errors encountered on the remote node while leaving"
5192 " the cluster: %s", msg)
5194 # Remove node from our /etc/hosts
5195 if self.cfg.GetClusterInfo().modify_etc_hosts:
5196 master_node = self.cfg.GetMasterNode()
5197 result = self.rpc.call_etc_hosts_modify(master_node,
5198 constants.ETC_HOSTS_REMOVE,
5200 result.Raise("Can't update hosts file with new host data")
5201 _RedistributeAncillaryFiles(self)
5204 class _NodeQuery(_QueryBase):
5205 FIELDS = query.NODE_FIELDS
5207 def ExpandNames(self, lu):
5208 lu.needed_locks = {}
5209 lu.share_locks = _ShareAll()
5212 self.wanted = _GetWantedNodes(lu, self.names)
5214 self.wanted = locking.ALL_SET
5216 self.do_locking = (self.use_locking and
5217 query.NQ_LIVE in self.requested_data)
5220 # If any non-static field is requested we need to lock the nodes
5221 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5223 def DeclareLocks(self, lu, level):
5226 def _GetQueryData(self, lu):
5227 """Computes the list of nodes and their attributes.
5230 all_info = lu.cfg.GetAllNodesInfo()
5232 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5234 # Gather data as requested
5235 if query.NQ_LIVE in self.requested_data:
5236 # filter out non-vm_capable nodes
5237 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5239 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5240 [lu.cfg.GetHypervisorType()])
5241 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5242 for (name, nresult) in node_data.items()
5243 if not nresult.fail_msg and nresult.payload)
5247 if query.NQ_INST in self.requested_data:
5248 node_to_primary = dict([(name, set()) for name in nodenames])
5249 node_to_secondary = dict([(name, set()) for name in nodenames])
5251 inst_data = lu.cfg.GetAllInstancesInfo()
5253 for inst in inst_data.values():
5254 if inst.primary_node in node_to_primary:
5255 node_to_primary[inst.primary_node].add(inst.name)
5256 for secnode in inst.secondary_nodes:
5257 if secnode in node_to_secondary:
5258 node_to_secondary[secnode].add(inst.name)
5260 node_to_primary = None
5261 node_to_secondary = None
5263 if query.NQ_OOB in self.requested_data:
5264 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5265 for name, node in all_info.iteritems())
5269 if query.NQ_GROUP in self.requested_data:
5270 groups = lu.cfg.GetAllNodeGroupsInfo()
5274 return query.NodeQueryData([all_info[name] for name in nodenames],
5275 live_data, lu.cfg.GetMasterNode(),
5276 node_to_primary, node_to_secondary, groups,
5277 oob_support, lu.cfg.GetClusterInfo())
5280 class LUNodeQuery(NoHooksLU):
5281 """Logical unit for querying nodes.
5284 # pylint: disable=W0142
5287 def CheckArguments(self):
5288 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5289 self.op.output_fields, self.op.use_locking)
5291 def ExpandNames(self):
5292 self.nq.ExpandNames(self)
5294 def DeclareLocks(self, level):
5295 self.nq.DeclareLocks(self, level)
5297 def Exec(self, feedback_fn):
5298 return self.nq.OldStyleQuery(self)
5301 class LUNodeQueryvols(NoHooksLU):
5302 """Logical unit for getting volumes on node(s).
5306 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5307 _FIELDS_STATIC = utils.FieldSet("node")
5309 def CheckArguments(self):
5310 _CheckOutputFields(static=self._FIELDS_STATIC,
5311 dynamic=self._FIELDS_DYNAMIC,
5312 selected=self.op.output_fields)
5314 def ExpandNames(self):
5315 self.share_locks = _ShareAll()
5316 self.needed_locks = {}
5318 if not self.op.nodes:
5319 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5321 self.needed_locks[locking.LEVEL_NODE] = \
5322 _GetWantedNodes(self, self.op.nodes)
5324 def Exec(self, feedback_fn):
5325 """Computes the list of nodes and their attributes.
5328 nodenames = self.owned_locks(locking.LEVEL_NODE)
5329 volumes = self.rpc.call_node_volumes(nodenames)
5331 ilist = self.cfg.GetAllInstancesInfo()
5332 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5335 for node in nodenames:
5336 nresult = volumes[node]
5339 msg = nresult.fail_msg
5341 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5344 node_vols = sorted(nresult.payload,
5345 key=operator.itemgetter("dev"))
5347 for vol in node_vols:
5349 for field in self.op.output_fields:
5352 elif field == "phys":
5356 elif field == "name":
5358 elif field == "size":
5359 val = int(float(vol["size"]))
5360 elif field == "instance":
5361 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5363 raise errors.ParameterError(field)
5364 node_output.append(str(val))
5366 output.append(node_output)
5371 class LUNodeQueryStorage(NoHooksLU):
5372 """Logical unit for getting information on storage units on node(s).
5375 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5378 def CheckArguments(self):
5379 _CheckOutputFields(static=self._FIELDS_STATIC,
5380 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5381 selected=self.op.output_fields)
5383 def ExpandNames(self):
5384 self.share_locks = _ShareAll()
5385 self.needed_locks = {}
5388 self.needed_locks[locking.LEVEL_NODE] = \
5389 _GetWantedNodes(self, self.op.nodes)
5391 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5393 def Exec(self, feedback_fn):
5394 """Computes the list of nodes and their attributes.
5397 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5399 # Always get name to sort by
5400 if constants.SF_NAME in self.op.output_fields:
5401 fields = self.op.output_fields[:]
5403 fields = [constants.SF_NAME] + self.op.output_fields
5405 # Never ask for node or type as it's only known to the LU
5406 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5407 while extra in fields:
5408 fields.remove(extra)
5410 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5411 name_idx = field_idx[constants.SF_NAME]
5413 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5414 data = self.rpc.call_storage_list(self.nodes,
5415 self.op.storage_type, st_args,
5416 self.op.name, fields)
5420 for node in utils.NiceSort(self.nodes):
5421 nresult = data[node]
5425 msg = nresult.fail_msg
5427 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5430 rows = dict([(row[name_idx], row) for row in nresult.payload])
5432 for name in utils.NiceSort(rows.keys()):
5437 for field in self.op.output_fields:
5438 if field == constants.SF_NODE:
5440 elif field == constants.SF_TYPE:
5441 val = self.op.storage_type
5442 elif field in field_idx:
5443 val = row[field_idx[field]]
5445 raise errors.ParameterError(field)
5454 class _InstanceQuery(_QueryBase):
5455 FIELDS = query.INSTANCE_FIELDS
5457 def ExpandNames(self, lu):
5458 lu.needed_locks = {}
5459 lu.share_locks = _ShareAll()
5462 self.wanted = _GetWantedInstances(lu, self.names)
5464 self.wanted = locking.ALL_SET
5466 self.do_locking = (self.use_locking and
5467 query.IQ_LIVE in self.requested_data)
5469 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5470 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5471 lu.needed_locks[locking.LEVEL_NODE] = []
5472 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5474 self.do_grouplocks = (self.do_locking and
5475 query.IQ_NODES in self.requested_data)
5477 def DeclareLocks(self, lu, level):
5479 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5480 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5482 # Lock all groups used by instances optimistically; this requires going
5483 # via the node before it's locked, requiring verification later on
5484 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5486 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5487 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5488 elif level == locking.LEVEL_NODE:
5489 lu._LockInstancesNodes() # pylint: disable=W0212
5492 def _CheckGroupLocks(lu):
5493 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5494 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5496 # Check if node groups for locked instances are still correct
5497 for instance_name in owned_instances:
5498 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5500 def _GetQueryData(self, lu):
5501 """Computes the list of instances and their attributes.
5504 if self.do_grouplocks:
5505 self._CheckGroupLocks(lu)
5507 cluster = lu.cfg.GetClusterInfo()
5508 all_info = lu.cfg.GetAllInstancesInfo()
5510 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5512 instance_list = [all_info[name] for name in instance_names]
5513 nodes = frozenset(itertools.chain(*(inst.all_nodes
5514 for inst in instance_list)))
5515 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5518 wrongnode_inst = set()
5520 # Gather data as requested
5521 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5523 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5525 result = node_data[name]
5527 # offline nodes will be in both lists
5528 assert result.fail_msg
5529 offline_nodes.append(name)
5531 bad_nodes.append(name)
5532 elif result.payload:
5533 for inst in result.payload:
5534 if inst in all_info:
5535 if all_info[inst].primary_node == name:
5536 live_data.update(result.payload)
5538 wrongnode_inst.add(inst)
5540 # orphan instance; we don't list it here as we don't
5541 # handle this case yet in the output of instance listing
5542 logging.warning("Orphan instance '%s' found on node %s",
5544 # else no instance is alive
5548 if query.IQ_DISKUSAGE in self.requested_data:
5549 gmi = ganeti.masterd.instance
5550 disk_usage = dict((inst.name,
5551 gmi.ComputeDiskSize(inst.disk_template,
5552 [{constants.IDISK_SIZE: disk.size}
5553 for disk in inst.disks]))
5554 for inst in instance_list)
5558 if query.IQ_CONSOLE in self.requested_data:
5560 for inst in instance_list:
5561 if inst.name in live_data:
5562 # Instance is running
5563 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5565 consinfo[inst.name] = None
5566 assert set(consinfo.keys()) == set(instance_names)
5570 if query.IQ_NODES in self.requested_data:
5571 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5573 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5574 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5575 for uuid in set(map(operator.attrgetter("group"),
5581 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5582 disk_usage, offline_nodes, bad_nodes,
5583 live_data, wrongnode_inst, consinfo,
5587 class LUQuery(NoHooksLU):
5588 """Query for resources/items of a certain kind.
5591 # pylint: disable=W0142
5594 def CheckArguments(self):
5595 qcls = _GetQueryImplementation(self.op.what)
5597 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5599 def ExpandNames(self):
5600 self.impl.ExpandNames(self)
5602 def DeclareLocks(self, level):
5603 self.impl.DeclareLocks(self, level)
5605 def Exec(self, feedback_fn):
5606 return self.impl.NewStyleQuery(self)
5609 class LUQueryFields(NoHooksLU):
5610 """Query for resources/items of a certain kind.
5613 # pylint: disable=W0142
5616 def CheckArguments(self):
5617 self.qcls = _GetQueryImplementation(self.op.what)
5619 def ExpandNames(self):
5620 self.needed_locks = {}
5622 def Exec(self, feedback_fn):
5623 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5626 class LUNodeModifyStorage(NoHooksLU):
5627 """Logical unit for modifying a storage volume on a node.
5632 def CheckArguments(self):
5633 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5635 storage_type = self.op.storage_type
5638 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5640 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5641 " modified" % storage_type,
5644 diff = set(self.op.changes.keys()) - modifiable
5646 raise errors.OpPrereqError("The following fields can not be modified for"
5647 " storage units of type '%s': %r" %
5648 (storage_type, list(diff)),
5651 def ExpandNames(self):
5652 self.needed_locks = {
5653 locking.LEVEL_NODE: self.op.node_name,
5656 def Exec(self, feedback_fn):
5657 """Computes the list of nodes and their attributes.
5660 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5661 result = self.rpc.call_storage_modify(self.op.node_name,
5662 self.op.storage_type, st_args,
5663 self.op.name, self.op.changes)
5664 result.Raise("Failed to modify storage unit '%s' on %s" %
5665 (self.op.name, self.op.node_name))
5668 class LUNodeAdd(LogicalUnit):
5669 """Logical unit for adding node to the cluster.
5673 HTYPE = constants.HTYPE_NODE
5674 _NFLAGS = ["master_capable", "vm_capable"]
5676 def CheckArguments(self):
5677 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5678 # validate/normalize the node name
5679 self.hostname = netutils.GetHostname(name=self.op.node_name,
5680 family=self.primary_ip_family)
5681 self.op.node_name = self.hostname.name
5683 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5684 raise errors.OpPrereqError("Cannot readd the master node",
5687 if self.op.readd and self.op.group:
5688 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5689 " being readded", errors.ECODE_INVAL)
5691 def BuildHooksEnv(self):
5694 This will run on all nodes before, and on all nodes + the new node after.
5698 "OP_TARGET": self.op.node_name,
5699 "NODE_NAME": self.op.node_name,
5700 "NODE_PIP": self.op.primary_ip,
5701 "NODE_SIP": self.op.secondary_ip,
5702 "MASTER_CAPABLE": str(self.op.master_capable),
5703 "VM_CAPABLE": str(self.op.vm_capable),
5706 def BuildHooksNodes(self):
5707 """Build hooks nodes.
5710 # Exclude added node
5711 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5712 post_nodes = pre_nodes + [self.op.node_name, ]
5714 return (pre_nodes, post_nodes)
5716 def CheckPrereq(self):
5717 """Check prerequisites.
5720 - the new node is not already in the config
5722 - its parameters (single/dual homed) matches the cluster
5724 Any errors are signaled by raising errors.OpPrereqError.
5728 hostname = self.hostname
5729 node = hostname.name
5730 primary_ip = self.op.primary_ip = hostname.ip
5731 if self.op.secondary_ip is None:
5732 if self.primary_ip_family == netutils.IP6Address.family:
5733 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5734 " IPv4 address must be given as secondary",
5736 self.op.secondary_ip = primary_ip
5738 secondary_ip = self.op.secondary_ip
5739 if not netutils.IP4Address.IsValid(secondary_ip):
5740 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5741 " address" % secondary_ip, errors.ECODE_INVAL)
5743 node_list = cfg.GetNodeList()
5744 if not self.op.readd and node in node_list:
5745 raise errors.OpPrereqError("Node %s is already in the configuration" %
5746 node, errors.ECODE_EXISTS)
5747 elif self.op.readd and node not in node_list:
5748 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5751 self.changed_primary_ip = False
5753 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5754 if self.op.readd and node == existing_node_name:
5755 if existing_node.secondary_ip != secondary_ip:
5756 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5757 " address configuration as before",
5759 if existing_node.primary_ip != primary_ip:
5760 self.changed_primary_ip = True
5764 if (existing_node.primary_ip == primary_ip or
5765 existing_node.secondary_ip == primary_ip or
5766 existing_node.primary_ip == secondary_ip or
5767 existing_node.secondary_ip == secondary_ip):
5768 raise errors.OpPrereqError("New node ip address(es) conflict with"
5769 " existing node %s" % existing_node.name,
5770 errors.ECODE_NOTUNIQUE)
5772 # After this 'if' block, None is no longer a valid value for the
5773 # _capable op attributes
5775 old_node = self.cfg.GetNodeInfo(node)
5776 assert old_node is not None, "Can't retrieve locked node %s" % node
5777 for attr in self._NFLAGS:
5778 if getattr(self.op, attr) is None:
5779 setattr(self.op, attr, getattr(old_node, attr))
5781 for attr in self._NFLAGS:
5782 if getattr(self.op, attr) is None:
5783 setattr(self.op, attr, True)
5785 if self.op.readd and not self.op.vm_capable:
5786 pri, sec = cfg.GetNodeInstances(node)
5788 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5789 " flag set to false, but it already holds"
5790 " instances" % node,
5793 # check that the type of the node (single versus dual homed) is the
5794 # same as for the master
5795 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5796 master_singlehomed = myself.secondary_ip == myself.primary_ip
5797 newbie_singlehomed = secondary_ip == primary_ip
5798 if master_singlehomed != newbie_singlehomed:
5799 if master_singlehomed:
5800 raise errors.OpPrereqError("The master has no secondary ip but the"
5801 " new node has one",
5804 raise errors.OpPrereqError("The master has a secondary ip but the"
5805 " new node doesn't have one",
5808 # checks reachability
5809 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5810 raise errors.OpPrereqError("Node not reachable by ping",
5811 errors.ECODE_ENVIRON)
5813 if not newbie_singlehomed:
5814 # check reachability from my secondary ip to newbie's secondary ip
5815 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5816 source=myself.secondary_ip):
5817 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5818 " based ping to node daemon port",
5819 errors.ECODE_ENVIRON)
5826 if self.op.master_capable:
5827 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5829 self.master_candidate = False
5832 self.new_node = old_node
5834 node_group = cfg.LookupNodeGroup(self.op.group)
5835 self.new_node = objects.Node(name=node,
5836 primary_ip=primary_ip,
5837 secondary_ip=secondary_ip,
5838 master_candidate=self.master_candidate,
5839 offline=False, drained=False,
5842 if self.op.ndparams:
5843 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5845 if self.op.hv_state:
5846 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5848 if self.op.disk_state:
5849 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5851 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5852 # it a property on the base class.
5853 result = rpc.DnsOnlyRunner().call_version([node])[node]
5854 result.Raise("Can't get version information from node %s" % node)
5855 if constants.PROTOCOL_VERSION == result.payload:
5856 logging.info("Communication to node %s fine, sw version %s match",
5857 node, result.payload)
5859 raise errors.OpPrereqError("Version mismatch master version %s,"
5860 " node version %s" %
5861 (constants.PROTOCOL_VERSION, result.payload),
5862 errors.ECODE_ENVIRON)
5864 def Exec(self, feedback_fn):
5865 """Adds the new node to the cluster.
5868 new_node = self.new_node
5869 node = new_node.name
5871 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5874 # We adding a new node so we assume it's powered
5875 new_node.powered = True
5877 # for re-adds, reset the offline/drained/master-candidate flags;
5878 # we need to reset here, otherwise offline would prevent RPC calls
5879 # later in the procedure; this also means that if the re-add
5880 # fails, we are left with a non-offlined, broken node
5882 new_node.drained = new_node.offline = False # pylint: disable=W0201
5883 self.LogInfo("Readding a node, the offline/drained flags were reset")
5884 # if we demote the node, we do cleanup later in the procedure
5885 new_node.master_candidate = self.master_candidate
5886 if self.changed_primary_ip:
5887 new_node.primary_ip = self.op.primary_ip
5889 # copy the master/vm_capable flags
5890 for attr in self._NFLAGS:
5891 setattr(new_node, attr, getattr(self.op, attr))
5893 # notify the user about any possible mc promotion
5894 if new_node.master_candidate:
5895 self.LogInfo("Node will be a master candidate")
5897 if self.op.ndparams:
5898 new_node.ndparams = self.op.ndparams
5900 new_node.ndparams = {}
5902 if self.op.hv_state:
5903 new_node.hv_state_static = self.new_hv_state
5905 if self.op.disk_state:
5906 new_node.disk_state_static = self.new_disk_state
5908 # Add node to our /etc/hosts, and add key to known_hosts
5909 if self.cfg.GetClusterInfo().modify_etc_hosts:
5910 master_node = self.cfg.GetMasterNode()
5911 result = self.rpc.call_etc_hosts_modify(master_node,
5912 constants.ETC_HOSTS_ADD,
5915 result.Raise("Can't update hosts file with new host data")
5917 if new_node.secondary_ip != new_node.primary_ip:
5918 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5921 node_verify_list = [self.cfg.GetMasterNode()]
5922 node_verify_param = {
5923 constants.NV_NODELIST: ([node], {}),
5924 # TODO: do a node-net-test as well?
5927 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5928 self.cfg.GetClusterName())
5929 for verifier in node_verify_list:
5930 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5931 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5933 for failed in nl_payload:
5934 feedback_fn("ssh/hostname verification failed"
5935 " (checking from %s): %s" %
5936 (verifier, nl_payload[failed]))
5937 raise errors.OpExecError("ssh/hostname verification failed")
5940 _RedistributeAncillaryFiles(self)
5941 self.context.ReaddNode(new_node)
5942 # make sure we redistribute the config
5943 self.cfg.Update(new_node, feedback_fn)
5944 # and make sure the new node will not have old files around
5945 if not new_node.master_candidate:
5946 result = self.rpc.call_node_demote_from_mc(new_node.name)
5947 msg = result.fail_msg
5949 self.LogWarning("Node failed to demote itself from master"
5950 " candidate status: %s" % msg)
5952 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5953 additional_vm=self.op.vm_capable)
5954 self.context.AddNode(new_node, self.proc.GetECId())
5957 class LUNodeSetParams(LogicalUnit):
5958 """Modifies the parameters of a node.
5960 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5961 to the node role (as _ROLE_*)
5962 @cvar _R2F: a dictionary from node role to tuples of flags
5963 @cvar _FLAGS: a list of attribute names corresponding to the flags
5966 HPATH = "node-modify"
5967 HTYPE = constants.HTYPE_NODE
5969 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5971 (True, False, False): _ROLE_CANDIDATE,
5972 (False, True, False): _ROLE_DRAINED,
5973 (False, False, True): _ROLE_OFFLINE,
5974 (False, False, False): _ROLE_REGULAR,
5976 _R2F = dict((v, k) for k, v in _F2R.items())
5977 _FLAGS = ["master_candidate", "drained", "offline"]
5979 def CheckArguments(self):
5980 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5981 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5982 self.op.master_capable, self.op.vm_capable,
5983 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5985 if all_mods.count(None) == len(all_mods):
5986 raise errors.OpPrereqError("Please pass at least one modification",
5988 if all_mods.count(True) > 1:
5989 raise errors.OpPrereqError("Can't set the node into more than one"
5990 " state at the same time",
5993 # Boolean value that tells us whether we might be demoting from MC
5994 self.might_demote = (self.op.master_candidate is False or
5995 self.op.offline is True or
5996 self.op.drained is True or
5997 self.op.master_capable is False)
5999 if self.op.secondary_ip:
6000 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6001 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6002 " address" % self.op.secondary_ip,
6005 self.lock_all = self.op.auto_promote and self.might_demote
6006 self.lock_instances = self.op.secondary_ip is not None
6008 def _InstanceFilter(self, instance):
6009 """Filter for getting affected instances.
6012 return (instance.disk_template in constants.DTS_INT_MIRROR and
6013 self.op.node_name in instance.all_nodes)
6015 def ExpandNames(self):
6017 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6019 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6021 # Since modifying a node can have severe effects on currently running
6022 # operations the resource lock is at least acquired in shared mode
6023 self.needed_locks[locking.LEVEL_NODE_RES] = \
6024 self.needed_locks[locking.LEVEL_NODE]
6026 # Get node resource and instance locks in shared mode; they are not used
6027 # for anything but read-only access
6028 self.share_locks[locking.LEVEL_NODE_RES] = 1
6029 self.share_locks[locking.LEVEL_INSTANCE] = 1
6031 if self.lock_instances:
6032 self.needed_locks[locking.LEVEL_INSTANCE] = \
6033 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6035 def BuildHooksEnv(self):
6038 This runs on the master node.
6042 "OP_TARGET": self.op.node_name,
6043 "MASTER_CANDIDATE": str(self.op.master_candidate),
6044 "OFFLINE": str(self.op.offline),
6045 "DRAINED": str(self.op.drained),
6046 "MASTER_CAPABLE": str(self.op.master_capable),
6047 "VM_CAPABLE": str(self.op.vm_capable),
6050 def BuildHooksNodes(self):
6051 """Build hooks nodes.
6054 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6057 def CheckPrereq(self):
6058 """Check prerequisites.
6060 This only checks the instance list against the existing names.
6063 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6065 if self.lock_instances:
6066 affected_instances = \
6067 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6069 # Verify instance locks
6070 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6071 wanted_instances = frozenset(affected_instances.keys())
6072 if wanted_instances - owned_instances:
6073 raise errors.OpPrereqError("Instances affected by changing node %s's"
6074 " secondary IP address have changed since"
6075 " locks were acquired, wanted '%s', have"
6076 " '%s'; retry the operation" %
6078 utils.CommaJoin(wanted_instances),
6079 utils.CommaJoin(owned_instances)),
6082 affected_instances = None
6084 if (self.op.master_candidate is not None or
6085 self.op.drained is not None or
6086 self.op.offline is not None):
6087 # we can't change the master's node flags
6088 if self.op.node_name == self.cfg.GetMasterNode():
6089 raise errors.OpPrereqError("The master role can be changed"
6090 " only via master-failover",
6093 if self.op.master_candidate and not node.master_capable:
6094 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6095 " it a master candidate" % node.name,
6098 if self.op.vm_capable is False:
6099 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6101 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6102 " the vm_capable flag" % node.name,
6105 if node.master_candidate and self.might_demote and not self.lock_all:
6106 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6107 # check if after removing the current node, we're missing master
6109 (mc_remaining, mc_should, _) = \
6110 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6111 if mc_remaining < mc_should:
6112 raise errors.OpPrereqError("Not enough master candidates, please"
6113 " pass auto promote option to allow"
6114 " promotion (--auto-promote or RAPI"
6115 " auto_promote=True)", errors.ECODE_STATE)
6117 self.old_flags = old_flags = (node.master_candidate,
6118 node.drained, node.offline)
6119 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6120 self.old_role = old_role = self._F2R[old_flags]
6122 # Check for ineffective changes
6123 for attr in self._FLAGS:
6124 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6125 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6126 setattr(self.op, attr, None)
6128 # Past this point, any flag change to False means a transition
6129 # away from the respective state, as only real changes are kept
6131 # TODO: We might query the real power state if it supports OOB
6132 if _SupportsOob(self.cfg, node):
6133 if self.op.offline is False and not (node.powered or
6134 self.op.powered is True):
6135 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6136 " offline status can be reset") %
6137 self.op.node_name, errors.ECODE_STATE)
6138 elif self.op.powered is not None:
6139 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6140 " as it does not support out-of-band"
6141 " handling") % self.op.node_name,
6144 # If we're being deofflined/drained, we'll MC ourself if needed
6145 if (self.op.drained is False or self.op.offline is False or
6146 (self.op.master_capable and not node.master_capable)):
6147 if _DecideSelfPromotion(self):
6148 self.op.master_candidate = True
6149 self.LogInfo("Auto-promoting node to master candidate")
6151 # If we're no longer master capable, we'll demote ourselves from MC
6152 if self.op.master_capable is False and node.master_candidate:
6153 self.LogInfo("Demoting from master candidate")
6154 self.op.master_candidate = False
6157 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6158 if self.op.master_candidate:
6159 new_role = self._ROLE_CANDIDATE
6160 elif self.op.drained:
6161 new_role = self._ROLE_DRAINED
6162 elif self.op.offline:
6163 new_role = self._ROLE_OFFLINE
6164 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6165 # False is still in new flags, which means we're un-setting (the
6167 new_role = self._ROLE_REGULAR
6168 else: # no new flags, nothing, keep old role
6171 self.new_role = new_role
6173 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6174 # Trying to transition out of offline status
6175 result = self.rpc.call_version([node.name])[node.name]
6177 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6178 " to report its version: %s" %
6179 (node.name, result.fail_msg),
6182 self.LogWarning("Transitioning node from offline to online state"
6183 " without using re-add. Please make sure the node"
6186 # When changing the secondary ip, verify if this is a single-homed to
6187 # multi-homed transition or vice versa, and apply the relevant
6189 if self.op.secondary_ip:
6190 # Ok even without locking, because this can't be changed by any LU
6191 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6192 master_singlehomed = master.secondary_ip == master.primary_ip
6193 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6194 if self.op.force and node.name == master.name:
6195 self.LogWarning("Transitioning from single-homed to multi-homed"
6196 " cluster. All nodes will require a secondary ip.")
6198 raise errors.OpPrereqError("Changing the secondary ip on a"
6199 " single-homed cluster requires the"
6200 " --force option to be passed, and the"
6201 " target node to be the master",
6203 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6204 if self.op.force and node.name == master.name:
6205 self.LogWarning("Transitioning from multi-homed to single-homed"
6206 " cluster. Secondary IPs will have to be removed.")
6208 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6209 " same as the primary IP on a multi-homed"
6210 " cluster, unless the --force option is"
6211 " passed, and the target node is the"
6212 " master", errors.ECODE_INVAL)
6214 assert not (frozenset(affected_instances) -
6215 self.owned_locks(locking.LEVEL_INSTANCE))
6218 if affected_instances:
6219 msg = ("Cannot change secondary IP address: offline node has"
6220 " instances (%s) configured to use it" %
6221 utils.CommaJoin(affected_instances.keys()))
6222 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6224 # On online nodes, check that no instances are running, and that
6225 # the node has the new ip and we can reach it.
6226 for instance in affected_instances.values():
6227 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6228 msg="cannot change secondary ip")
6230 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6231 if master.name != node.name:
6232 # check reachability from master secondary ip to new secondary ip
6233 if not netutils.TcpPing(self.op.secondary_ip,
6234 constants.DEFAULT_NODED_PORT,
6235 source=master.secondary_ip):
6236 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6237 " based ping to node daemon port",
6238 errors.ECODE_ENVIRON)
6240 if self.op.ndparams:
6241 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6242 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6243 self.new_ndparams = new_ndparams
6245 if self.op.hv_state:
6246 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6247 self.node.hv_state_static)
6249 if self.op.disk_state:
6250 self.new_disk_state = \
6251 _MergeAndVerifyDiskState(self.op.disk_state,
6252 self.node.disk_state_static)
6254 def Exec(self, feedback_fn):
6259 old_role = self.old_role
6260 new_role = self.new_role
6264 if self.op.ndparams:
6265 node.ndparams = self.new_ndparams
6267 if self.op.powered is not None:
6268 node.powered = self.op.powered
6270 if self.op.hv_state:
6271 node.hv_state_static = self.new_hv_state
6273 if self.op.disk_state:
6274 node.disk_state_static = self.new_disk_state
6276 for attr in ["master_capable", "vm_capable"]:
6277 val = getattr(self.op, attr)
6279 setattr(node, attr, val)
6280 result.append((attr, str(val)))
6282 if new_role != old_role:
6283 # Tell the node to demote itself, if no longer MC and not offline
6284 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6285 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6287 self.LogWarning("Node failed to demote itself: %s", msg)
6289 new_flags = self._R2F[new_role]
6290 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6292 result.append((desc, str(nf)))
6293 (node.master_candidate, node.drained, node.offline) = new_flags
6295 # we locked all nodes, we adjust the CP before updating this node
6297 _AdjustCandidatePool(self, [node.name])
6299 if self.op.secondary_ip:
6300 node.secondary_ip = self.op.secondary_ip
6301 result.append(("secondary_ip", self.op.secondary_ip))
6303 # this will trigger configuration file update, if needed
6304 self.cfg.Update(node, feedback_fn)
6306 # this will trigger job queue propagation or cleanup if the mc
6308 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6309 self.context.ReaddNode(node)
6314 class LUNodePowercycle(NoHooksLU):
6315 """Powercycles a node.
6320 def CheckArguments(self):
6321 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6322 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6323 raise errors.OpPrereqError("The node is the master and the force"
6324 " parameter was not set",
6327 def ExpandNames(self):
6328 """Locking for PowercycleNode.
6330 This is a last-resort option and shouldn't block on other
6331 jobs. Therefore, we grab no locks.
6334 self.needed_locks = {}
6336 def Exec(self, feedback_fn):
6340 result = self.rpc.call_node_powercycle(self.op.node_name,
6341 self.cfg.GetHypervisorType())
6342 result.Raise("Failed to schedule the reboot")
6343 return result.payload
6346 class LUClusterQuery(NoHooksLU):
6347 """Query cluster configuration.
6352 def ExpandNames(self):
6353 self.needed_locks = {}
6355 def Exec(self, feedback_fn):
6356 """Return cluster config.
6359 cluster = self.cfg.GetClusterInfo()
6362 # Filter just for enabled hypervisors
6363 for os_name, hv_dict in cluster.os_hvp.items():
6364 os_hvp[os_name] = {}
6365 for hv_name, hv_params in hv_dict.items():
6366 if hv_name in cluster.enabled_hypervisors:
6367 os_hvp[os_name][hv_name] = hv_params
6369 # Convert ip_family to ip_version
6370 primary_ip_version = constants.IP4_VERSION
6371 if cluster.primary_ip_family == netutils.IP6Address.family:
6372 primary_ip_version = constants.IP6_VERSION
6375 "software_version": constants.RELEASE_VERSION,
6376 "protocol_version": constants.PROTOCOL_VERSION,
6377 "config_version": constants.CONFIG_VERSION,
6378 "os_api_version": max(constants.OS_API_VERSIONS),
6379 "export_version": constants.EXPORT_VERSION,
6380 "architecture": runtime.GetArchInfo(),
6381 "name": cluster.cluster_name,
6382 "master": cluster.master_node,
6383 "default_hypervisor": cluster.primary_hypervisor,
6384 "enabled_hypervisors": cluster.enabled_hypervisors,
6385 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6386 for hypervisor_name in cluster.enabled_hypervisors]),
6388 "beparams": cluster.beparams,
6389 "osparams": cluster.osparams,
6390 "ipolicy": cluster.ipolicy,
6391 "nicparams": cluster.nicparams,
6392 "ndparams": cluster.ndparams,
6393 "diskparams": cluster.diskparams,
6394 "candidate_pool_size": cluster.candidate_pool_size,
6395 "master_netdev": cluster.master_netdev,
6396 "master_netmask": cluster.master_netmask,
6397 "use_external_mip_script": cluster.use_external_mip_script,
6398 "volume_group_name": cluster.volume_group_name,
6399 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6400 "file_storage_dir": cluster.file_storage_dir,
6401 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6402 "maintain_node_health": cluster.maintain_node_health,
6403 "ctime": cluster.ctime,
6404 "mtime": cluster.mtime,
6405 "uuid": cluster.uuid,
6406 "tags": list(cluster.GetTags()),
6407 "uid_pool": cluster.uid_pool,
6408 "default_iallocator": cluster.default_iallocator,
6409 "reserved_lvs": cluster.reserved_lvs,
6410 "primary_ip_version": primary_ip_version,
6411 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6412 "hidden_os": cluster.hidden_os,
6413 "blacklisted_os": cluster.blacklisted_os,
6419 class LUClusterConfigQuery(NoHooksLU):
6420 """Return configuration values.
6425 def CheckArguments(self):
6426 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6428 def ExpandNames(self):
6429 self.cq.ExpandNames(self)
6431 def DeclareLocks(self, level):
6432 self.cq.DeclareLocks(self, level)
6434 def Exec(self, feedback_fn):
6435 result = self.cq.OldStyleQuery(self)
6437 assert len(result) == 1
6442 class _ClusterQuery(_QueryBase):
6443 FIELDS = query.CLUSTER_FIELDS
6445 #: Do not sort (there is only one item)
6448 def ExpandNames(self, lu):
6449 lu.needed_locks = {}
6451 # The following variables interact with _QueryBase._GetNames
6452 self.wanted = locking.ALL_SET
6453 self.do_locking = self.use_locking
6456 raise errors.OpPrereqError("Can not use locking for cluster queries",
6459 def DeclareLocks(self, lu, level):
6462 def _GetQueryData(self, lu):
6463 """Computes the list of nodes and their attributes.
6466 # Locking is not used
6467 assert not (compat.any(lu.glm.is_owned(level)
6468 for level in locking.LEVELS
6469 if level != locking.LEVEL_CLUSTER) or
6470 self.do_locking or self.use_locking)
6472 if query.CQ_CONFIG in self.requested_data:
6473 cluster = lu.cfg.GetClusterInfo()
6475 cluster = NotImplemented
6477 if query.CQ_QUEUE_DRAINED in self.requested_data:
6478 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6480 drain_flag = NotImplemented
6482 if query.CQ_WATCHER_PAUSE in self.requested_data:
6483 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6485 watcher_pause = NotImplemented
6487 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6490 class LUInstanceActivateDisks(NoHooksLU):
6491 """Bring up an instance's disks.
6496 def ExpandNames(self):
6497 self._ExpandAndLockInstance()
6498 self.needed_locks[locking.LEVEL_NODE] = []
6499 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6501 def DeclareLocks(self, level):
6502 if level == locking.LEVEL_NODE:
6503 self._LockInstancesNodes()
6505 def CheckPrereq(self):
6506 """Check prerequisites.
6508 This checks that the instance is in the cluster.
6511 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6512 assert self.instance is not None, \
6513 "Cannot retrieve locked instance %s" % self.op.instance_name
6514 _CheckNodeOnline(self, self.instance.primary_node)
6516 def Exec(self, feedback_fn):
6517 """Activate the disks.
6520 disks_ok, disks_info = \
6521 _AssembleInstanceDisks(self, self.instance,
6522 ignore_size=self.op.ignore_size)
6524 raise errors.OpExecError("Cannot activate block devices")
6526 if self.op.wait_for_sync:
6527 if not _WaitForSync(self, self.instance):
6528 raise errors.OpExecError("Some disks of the instance are degraded!")
6533 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6535 """Prepare the block devices for an instance.
6537 This sets up the block devices on all nodes.
6539 @type lu: L{LogicalUnit}
6540 @param lu: the logical unit on whose behalf we execute
6541 @type instance: L{objects.Instance}
6542 @param instance: the instance for whose disks we assemble
6543 @type disks: list of L{objects.Disk} or None
6544 @param disks: which disks to assemble (or all, if None)
6545 @type ignore_secondaries: boolean
6546 @param ignore_secondaries: if true, errors on secondary nodes
6547 won't result in an error return from the function
6548 @type ignore_size: boolean
6549 @param ignore_size: if true, the current known size of the disk
6550 will not be used during the disk activation, useful for cases
6551 when the size is wrong
6552 @return: False if the operation failed, otherwise a list of
6553 (host, instance_visible_name, node_visible_name)
6554 with the mapping from node devices to instance devices
6559 iname = instance.name
6560 disks = _ExpandCheckDisks(instance, disks)
6562 # With the two passes mechanism we try to reduce the window of
6563 # opportunity for the race condition of switching DRBD to primary
6564 # before handshaking occured, but we do not eliminate it
6566 # The proper fix would be to wait (with some limits) until the
6567 # connection has been made and drbd transitions from WFConnection
6568 # into any other network-connected state (Connected, SyncTarget,
6571 # 1st pass, assemble on all nodes in secondary mode
6572 for idx, inst_disk in enumerate(disks):
6573 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6575 node_disk = node_disk.Copy()
6576 node_disk.UnsetSize()
6577 lu.cfg.SetDiskID(node_disk, node)
6578 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6580 msg = result.fail_msg
6582 is_offline_secondary = (node in instance.secondary_nodes and
6584 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6585 " (is_primary=False, pass=1): %s",
6586 inst_disk.iv_name, node, msg)
6587 if not (ignore_secondaries or is_offline_secondary):
6590 # FIXME: race condition on drbd migration to primary
6592 # 2nd pass, do only the primary node
6593 for idx, inst_disk in enumerate(disks):
6596 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6597 if node != instance.primary_node:
6600 node_disk = node_disk.Copy()
6601 node_disk.UnsetSize()
6602 lu.cfg.SetDiskID(node_disk, node)
6603 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6605 msg = result.fail_msg
6607 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6608 " (is_primary=True, pass=2): %s",
6609 inst_disk.iv_name, node, msg)
6612 dev_path = result.payload
6614 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6616 # leave the disks configured for the primary node
6617 # this is a workaround that would be fixed better by
6618 # improving the logical/physical id handling
6620 lu.cfg.SetDiskID(disk, instance.primary_node)
6622 return disks_ok, device_info
6625 def _StartInstanceDisks(lu, instance, force):
6626 """Start the disks of an instance.
6629 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6630 ignore_secondaries=force)
6632 _ShutdownInstanceDisks(lu, instance)
6633 if force is not None and not force:
6634 lu.proc.LogWarning("", hint="If the message above refers to a"
6636 " you can retry the operation using '--force'.")
6637 raise errors.OpExecError("Disk consistency error")
6640 class LUInstanceDeactivateDisks(NoHooksLU):
6641 """Shutdown an instance's disks.
6646 def ExpandNames(self):
6647 self._ExpandAndLockInstance()
6648 self.needed_locks[locking.LEVEL_NODE] = []
6649 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6651 def DeclareLocks(self, level):
6652 if level == locking.LEVEL_NODE:
6653 self._LockInstancesNodes()
6655 def CheckPrereq(self):
6656 """Check prerequisites.
6658 This checks that the instance is in the cluster.
6661 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6662 assert self.instance is not None, \
6663 "Cannot retrieve locked instance %s" % self.op.instance_name
6665 def Exec(self, feedback_fn):
6666 """Deactivate the disks
6669 instance = self.instance
6671 _ShutdownInstanceDisks(self, instance)
6673 _SafeShutdownInstanceDisks(self, instance)
6676 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6677 """Shutdown block devices of an instance.
6679 This function checks if an instance is running, before calling
6680 _ShutdownInstanceDisks.
6683 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6684 _ShutdownInstanceDisks(lu, instance, disks=disks)
6687 def _ExpandCheckDisks(instance, disks):
6688 """Return the instance disks selected by the disks list
6690 @type disks: list of L{objects.Disk} or None
6691 @param disks: selected disks
6692 @rtype: list of L{objects.Disk}
6693 @return: selected instance disks to act on
6697 return instance.disks
6699 if not set(disks).issubset(instance.disks):
6700 raise errors.ProgrammerError("Can only act on disks belonging to the"
6705 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6706 """Shutdown block devices of an instance.
6708 This does the shutdown on all nodes of the instance.
6710 If the ignore_primary is false, errors on the primary node are
6715 disks = _ExpandCheckDisks(instance, disks)
6718 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6719 lu.cfg.SetDiskID(top_disk, node)
6720 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6721 msg = result.fail_msg
6723 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6724 disk.iv_name, node, msg)
6725 if ((node == instance.primary_node and not ignore_primary) or
6726 (node != instance.primary_node and not result.offline)):
6731 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6732 """Checks if a node has enough free memory.
6734 This function check if a given node has the needed amount of free
6735 memory. In case the node has less memory or we cannot get the
6736 information from the node, this function raise an OpPrereqError
6739 @type lu: C{LogicalUnit}
6740 @param lu: a logical unit from which we get configuration data
6742 @param node: the node to check
6743 @type reason: C{str}
6744 @param reason: string to use in the error message
6745 @type requested: C{int}
6746 @param requested: the amount of memory in MiB to check for
6747 @type hypervisor_name: C{str}
6748 @param hypervisor_name: the hypervisor to ask for memory stats
6750 @return: node current free memory
6751 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6752 we cannot check the node
6755 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6756 nodeinfo[node].Raise("Can't get data from node %s" % node,
6757 prereq=True, ecode=errors.ECODE_ENVIRON)
6758 (_, _, (hv_info, )) = nodeinfo[node].payload
6760 free_mem = hv_info.get("memory_free", None)
6761 if not isinstance(free_mem, int):
6762 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6763 " was '%s'" % (node, free_mem),
6764 errors.ECODE_ENVIRON)
6765 if requested > free_mem:
6766 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6767 " needed %s MiB, available %s MiB" %
6768 (node, reason, requested, free_mem),
6773 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6774 """Checks if nodes have enough free disk space in the all VGs.
6776 This function check if all given nodes have the needed amount of
6777 free disk. In case any node has less disk or we cannot get the
6778 information from the node, this function raise an OpPrereqError
6781 @type lu: C{LogicalUnit}
6782 @param lu: a logical unit from which we get configuration data
6783 @type nodenames: C{list}
6784 @param nodenames: the list of node names to check
6785 @type req_sizes: C{dict}
6786 @param req_sizes: the hash of vg and corresponding amount of disk in
6788 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6789 or we cannot check the node
6792 for vg, req_size in req_sizes.items():
6793 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6796 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6797 """Checks if nodes have enough free disk space in the specified VG.
6799 This function check if all given nodes have the needed amount of
6800 free disk. In case any node has less disk or we cannot get the
6801 information from the node, this function raise an OpPrereqError
6804 @type lu: C{LogicalUnit}
6805 @param lu: a logical unit from which we get configuration data
6806 @type nodenames: C{list}
6807 @param nodenames: the list of node names to check
6809 @param vg: the volume group to check
6810 @type requested: C{int}
6811 @param requested: the amount of disk in MiB to check for
6812 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6813 or we cannot check the node
6816 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6817 for node in nodenames:
6818 info = nodeinfo[node]
6819 info.Raise("Cannot get current information from node %s" % node,
6820 prereq=True, ecode=errors.ECODE_ENVIRON)
6821 (_, (vg_info, ), _) = info.payload
6822 vg_free = vg_info.get("vg_free", None)
6823 if not isinstance(vg_free, int):
6824 raise errors.OpPrereqError("Can't compute free disk space on node"
6825 " %s for vg %s, result was '%s'" %
6826 (node, vg, vg_free), errors.ECODE_ENVIRON)
6827 if requested > vg_free:
6828 raise errors.OpPrereqError("Not enough disk space on target node %s"
6829 " vg %s: required %d MiB, available %d MiB" %
6830 (node, vg, requested, vg_free),
6834 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6835 """Checks if nodes have enough physical CPUs
6837 This function checks if all given nodes have the needed number of
6838 physical CPUs. In case any node has less CPUs or we cannot get the
6839 information from the node, this function raises an OpPrereqError
6842 @type lu: C{LogicalUnit}
6843 @param lu: a logical unit from which we get configuration data
6844 @type nodenames: C{list}
6845 @param nodenames: the list of node names to check
6846 @type requested: C{int}
6847 @param requested: the minimum acceptable number of physical CPUs
6848 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6849 or we cannot check the node
6852 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6853 for node in nodenames:
6854 info = nodeinfo[node]
6855 info.Raise("Cannot get current information from node %s" % node,
6856 prereq=True, ecode=errors.ECODE_ENVIRON)
6857 (_, _, (hv_info, )) = info.payload
6858 num_cpus = hv_info.get("cpu_total", None)
6859 if not isinstance(num_cpus, int):
6860 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6861 " on node %s, result was '%s'" %
6862 (node, num_cpus), errors.ECODE_ENVIRON)
6863 if requested > num_cpus:
6864 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6865 "required" % (node, num_cpus, requested),
6869 class LUInstanceStartup(LogicalUnit):
6870 """Starts an instance.
6873 HPATH = "instance-start"
6874 HTYPE = constants.HTYPE_INSTANCE
6877 def CheckArguments(self):
6879 if self.op.beparams:
6880 # fill the beparams dict
6881 objects.UpgradeBeParams(self.op.beparams)
6882 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6884 def ExpandNames(self):
6885 self._ExpandAndLockInstance()
6886 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6888 def DeclareLocks(self, level):
6889 if level == locking.LEVEL_NODE_RES:
6890 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6892 def BuildHooksEnv(self):
6895 This runs on master, primary and secondary nodes of the instance.
6899 "FORCE": self.op.force,
6902 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6906 def BuildHooksNodes(self):
6907 """Build hooks nodes.
6910 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6913 def CheckPrereq(self):
6914 """Check prerequisites.
6916 This checks that the instance is in the cluster.
6919 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6920 assert self.instance is not None, \
6921 "Cannot retrieve locked instance %s" % self.op.instance_name
6924 if self.op.hvparams:
6925 # check hypervisor parameter syntax (locally)
6926 cluster = self.cfg.GetClusterInfo()
6927 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6928 filled_hvp = cluster.FillHV(instance)
6929 filled_hvp.update(self.op.hvparams)
6930 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6931 hv_type.CheckParameterSyntax(filled_hvp)
6932 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6934 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6936 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6938 if self.primary_offline and self.op.ignore_offline_nodes:
6939 self.proc.LogWarning("Ignoring offline primary node")
6941 if self.op.hvparams or self.op.beparams:
6942 self.proc.LogWarning("Overridden parameters are ignored")
6944 _CheckNodeOnline(self, instance.primary_node)
6946 bep = self.cfg.GetClusterInfo().FillBE(instance)
6947 bep.update(self.op.beparams)
6949 # check bridges existence
6950 _CheckInstanceBridgesExist(self, instance)
6952 remote_info = self.rpc.call_instance_info(instance.primary_node,
6954 instance.hypervisor)
6955 remote_info.Raise("Error checking node %s" % instance.primary_node,
6956 prereq=True, ecode=errors.ECODE_ENVIRON)
6957 if not remote_info.payload: # not running already
6958 _CheckNodeFreeMemory(self, instance.primary_node,
6959 "starting instance %s" % instance.name,
6960 bep[constants.BE_MINMEM], instance.hypervisor)
6962 def Exec(self, feedback_fn):
6963 """Start the instance.
6966 instance = self.instance
6967 force = self.op.force
6969 if not self.op.no_remember:
6970 self.cfg.MarkInstanceUp(instance.name)
6972 if self.primary_offline:
6973 assert self.op.ignore_offline_nodes
6974 self.proc.LogInfo("Primary node offline, marked instance as started")
6976 node_current = instance.primary_node
6978 _StartInstanceDisks(self, instance, force)
6981 self.rpc.call_instance_start(node_current,
6982 (instance, self.op.hvparams,
6984 self.op.startup_paused)
6985 msg = result.fail_msg
6987 _ShutdownInstanceDisks(self, instance)
6988 raise errors.OpExecError("Could not start instance: %s" % msg)
6991 class LUInstanceReboot(LogicalUnit):
6992 """Reboot an instance.
6995 HPATH = "instance-reboot"
6996 HTYPE = constants.HTYPE_INSTANCE
6999 def ExpandNames(self):
7000 self._ExpandAndLockInstance()
7002 def BuildHooksEnv(self):
7005 This runs on master, primary and secondary nodes of the instance.
7009 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7010 "REBOOT_TYPE": self.op.reboot_type,
7011 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7014 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7018 def BuildHooksNodes(self):
7019 """Build hooks nodes.
7022 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7025 def CheckPrereq(self):
7026 """Check prerequisites.
7028 This checks that the instance is in the cluster.
7031 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7032 assert self.instance is not None, \
7033 "Cannot retrieve locked instance %s" % self.op.instance_name
7034 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7035 _CheckNodeOnline(self, instance.primary_node)
7037 # check bridges existence
7038 _CheckInstanceBridgesExist(self, instance)
7040 def Exec(self, feedback_fn):
7041 """Reboot the instance.
7044 instance = self.instance
7045 ignore_secondaries = self.op.ignore_secondaries
7046 reboot_type = self.op.reboot_type
7048 remote_info = self.rpc.call_instance_info(instance.primary_node,
7050 instance.hypervisor)
7051 remote_info.Raise("Error checking node %s" % instance.primary_node)
7052 instance_running = bool(remote_info.payload)
7054 node_current = instance.primary_node
7056 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7057 constants.INSTANCE_REBOOT_HARD]:
7058 for disk in instance.disks:
7059 self.cfg.SetDiskID(disk, node_current)
7060 result = self.rpc.call_instance_reboot(node_current, instance,
7062 self.op.shutdown_timeout)
7063 result.Raise("Could not reboot instance")
7065 if instance_running:
7066 result = self.rpc.call_instance_shutdown(node_current, instance,
7067 self.op.shutdown_timeout)
7068 result.Raise("Could not shutdown instance for full reboot")
7069 _ShutdownInstanceDisks(self, instance)
7071 self.LogInfo("Instance %s was already stopped, starting now",
7073 _StartInstanceDisks(self, instance, ignore_secondaries)
7074 result = self.rpc.call_instance_start(node_current,
7075 (instance, None, None), False)
7076 msg = result.fail_msg
7078 _ShutdownInstanceDisks(self, instance)
7079 raise errors.OpExecError("Could not start instance for"
7080 " full reboot: %s" % msg)
7082 self.cfg.MarkInstanceUp(instance.name)
7085 class LUInstanceShutdown(LogicalUnit):
7086 """Shutdown an instance.
7089 HPATH = "instance-stop"
7090 HTYPE = constants.HTYPE_INSTANCE
7093 def ExpandNames(self):
7094 self._ExpandAndLockInstance()
7096 def BuildHooksEnv(self):
7099 This runs on master, primary and secondary nodes of the instance.
7102 env = _BuildInstanceHookEnvByObject(self, self.instance)
7103 env["TIMEOUT"] = self.op.timeout
7106 def BuildHooksNodes(self):
7107 """Build hooks nodes.
7110 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7113 def CheckPrereq(self):
7114 """Check prerequisites.
7116 This checks that the instance is in the cluster.
7119 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7120 assert self.instance is not None, \
7121 "Cannot retrieve locked instance %s" % self.op.instance_name
7123 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7125 self.primary_offline = \
7126 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7128 if self.primary_offline and self.op.ignore_offline_nodes:
7129 self.proc.LogWarning("Ignoring offline primary node")
7131 _CheckNodeOnline(self, self.instance.primary_node)
7133 def Exec(self, feedback_fn):
7134 """Shutdown the instance.
7137 instance = self.instance
7138 node_current = instance.primary_node
7139 timeout = self.op.timeout
7141 if not self.op.no_remember:
7142 self.cfg.MarkInstanceDown(instance.name)
7144 if self.primary_offline:
7145 assert self.op.ignore_offline_nodes
7146 self.proc.LogInfo("Primary node offline, marked instance as stopped")
7148 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7149 msg = result.fail_msg
7151 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7153 _ShutdownInstanceDisks(self, instance)
7156 class LUInstanceReinstall(LogicalUnit):
7157 """Reinstall an instance.
7160 HPATH = "instance-reinstall"
7161 HTYPE = constants.HTYPE_INSTANCE
7164 def ExpandNames(self):
7165 self._ExpandAndLockInstance()
7167 def BuildHooksEnv(self):
7170 This runs on master, primary and secondary nodes of the instance.
7173 return _BuildInstanceHookEnvByObject(self, self.instance)
7175 def BuildHooksNodes(self):
7176 """Build hooks nodes.
7179 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7182 def CheckPrereq(self):
7183 """Check prerequisites.
7185 This checks that the instance is in the cluster and is not running.
7188 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7189 assert instance is not None, \
7190 "Cannot retrieve locked instance %s" % self.op.instance_name
7191 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7192 " offline, cannot reinstall")
7194 if instance.disk_template == constants.DT_DISKLESS:
7195 raise errors.OpPrereqError("Instance '%s' has no disks" %
7196 self.op.instance_name,
7198 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7200 if self.op.os_type is not None:
7202 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7203 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7204 instance_os = self.op.os_type
7206 instance_os = instance.os
7208 nodelist = list(instance.all_nodes)
7210 if self.op.osparams:
7211 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7212 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7213 self.os_inst = i_osdict # the new dict (without defaults)
7217 self.instance = instance
7219 def Exec(self, feedback_fn):
7220 """Reinstall the instance.
7223 inst = self.instance
7225 if self.op.os_type is not None:
7226 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7227 inst.os = self.op.os_type
7228 # Write to configuration
7229 self.cfg.Update(inst, feedback_fn)
7231 _StartInstanceDisks(self, inst, None)
7233 feedback_fn("Running the instance OS create scripts...")
7234 # FIXME: pass debug option from opcode to backend
7235 result = self.rpc.call_instance_os_add(inst.primary_node,
7236 (inst, self.os_inst), True,
7237 self.op.debug_level)
7238 result.Raise("Could not install OS for instance %s on node %s" %
7239 (inst.name, inst.primary_node))
7241 _ShutdownInstanceDisks(self, inst)
7244 class LUInstanceRecreateDisks(LogicalUnit):
7245 """Recreate an instance's missing disks.
7248 HPATH = "instance-recreate-disks"
7249 HTYPE = constants.HTYPE_INSTANCE
7252 _MODIFYABLE = frozenset([
7253 constants.IDISK_SIZE,
7254 constants.IDISK_MODE,
7257 # New or changed disk parameters may have different semantics
7258 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7259 constants.IDISK_ADOPT,
7261 # TODO: Implement support changing VG while recreating
7263 constants.IDISK_METAVG,
7266 def _RunAllocator(self):
7267 """Run the allocator based on input opcode.
7270 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7273 # The allocator should actually run in "relocate" mode, but current
7274 # allocators don't support relocating all the nodes of an instance at
7275 # the same time. As a workaround we use "allocate" mode, but this is
7276 # suboptimal for two reasons:
7277 # - The instance name passed to the allocator is present in the list of
7278 # existing instances, so there could be a conflict within the
7279 # internal structures of the allocator. This doesn't happen with the
7280 # current allocators, but it's a liability.
7281 # - The allocator counts the resources used by the instance twice: once
7282 # because the instance exists already, and once because it tries to
7283 # allocate a new instance.
7284 # The allocator could choose some of the nodes on which the instance is
7285 # running, but that's not a problem. If the instance nodes are broken,
7286 # they should be already be marked as drained or offline, and hence
7287 # skipped by the allocator. If instance disks have been lost for other
7288 # reasons, then recreating the disks on the same nodes should be fine.
7289 disk_template = self.instance.disk_template
7290 spindle_use = be_full[constants.BE_SPINDLE_USE]
7291 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7292 disk_template=disk_template,
7293 tags=list(self.instance.GetTags()),
7294 os=self.instance.os,
7296 vcpus=be_full[constants.BE_VCPUS],
7297 memory=be_full[constants.BE_MAXMEM],
7298 spindle_use=spindle_use,
7299 disks=[{constants.IDISK_SIZE: d.size,
7300 constants.IDISK_MODE: d.mode}
7301 for d in self.instance.disks],
7302 hypervisor=self.instance.hypervisor)
7303 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7305 ial.Run(self.op.iallocator)
7307 assert req.RequiredNodes() == len(self.instance.all_nodes)
7310 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7311 " %s" % (self.op.iallocator, ial.info),
7314 self.op.nodes = ial.result
7315 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7316 self.op.instance_name, self.op.iallocator,
7317 utils.CommaJoin(ial.result))
7319 def CheckArguments(self):
7320 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7321 # Normalize and convert deprecated list of disk indices
7322 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7324 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7326 raise errors.OpPrereqError("Some disks have been specified more than"
7327 " once: %s" % utils.CommaJoin(duplicates),
7330 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7331 # when neither iallocator nor nodes are specified
7332 if self.op.iallocator or self.op.nodes:
7333 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7335 for (idx, params) in self.op.disks:
7336 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7337 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7339 raise errors.OpPrereqError("Parameters for disk %s try to change"
7340 " unmodifyable parameter(s): %s" %
7341 (idx, utils.CommaJoin(unsupported)),
7344 def ExpandNames(self):
7345 self._ExpandAndLockInstance()
7346 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7348 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7349 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7351 self.needed_locks[locking.LEVEL_NODE] = []
7352 if self.op.iallocator:
7353 # iallocator will select a new node in the same group
7354 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7355 self.needed_locks[locking.LEVEL_NODE_RES] = []
7357 def DeclareLocks(self, level):
7358 if level == locking.LEVEL_NODEGROUP:
7359 assert self.op.iallocator is not None
7360 assert not self.op.nodes
7361 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7362 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7363 # Lock the primary group used by the instance optimistically; this
7364 # requires going via the node before it's locked, requiring
7365 # verification later on
7366 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7367 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7369 elif level == locking.LEVEL_NODE:
7370 # If an allocator is used, then we lock all the nodes in the current
7371 # instance group, as we don't know yet which ones will be selected;
7372 # if we replace the nodes without using an allocator, locks are
7373 # already declared in ExpandNames; otherwise, we need to lock all the
7374 # instance nodes for disk re-creation
7375 if self.op.iallocator:
7376 assert not self.op.nodes
7377 assert not self.needed_locks[locking.LEVEL_NODE]
7378 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7380 # Lock member nodes of the group of the primary node
7381 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7382 self.needed_locks[locking.LEVEL_NODE].extend(
7383 self.cfg.GetNodeGroup(group_uuid).members)
7384 elif not self.op.nodes:
7385 self._LockInstancesNodes(primary_only=False)
7386 elif level == locking.LEVEL_NODE_RES:
7388 self.needed_locks[locking.LEVEL_NODE_RES] = \
7389 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7391 def BuildHooksEnv(self):
7394 This runs on master, primary and secondary nodes of the instance.
7397 return _BuildInstanceHookEnvByObject(self, self.instance)
7399 def BuildHooksNodes(self):
7400 """Build hooks nodes.
7403 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7406 def CheckPrereq(self):
7407 """Check prerequisites.
7409 This checks that the instance is in the cluster and is not running.
7412 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7413 assert instance is not None, \
7414 "Cannot retrieve locked instance %s" % self.op.instance_name
7416 if len(self.op.nodes) != len(instance.all_nodes):
7417 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7418 " %d replacement nodes were specified" %
7419 (instance.name, len(instance.all_nodes),
7420 len(self.op.nodes)),
7422 assert instance.disk_template != constants.DT_DRBD8 or \
7423 len(self.op.nodes) == 2
7424 assert instance.disk_template != constants.DT_PLAIN or \
7425 len(self.op.nodes) == 1
7426 primary_node = self.op.nodes[0]
7428 primary_node = instance.primary_node
7429 if not self.op.iallocator:
7430 _CheckNodeOnline(self, primary_node)
7432 if instance.disk_template == constants.DT_DISKLESS:
7433 raise errors.OpPrereqError("Instance '%s' has no disks" %
7434 self.op.instance_name, errors.ECODE_INVAL)
7436 # Verify if node group locks are still correct
7437 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7439 # Node group locks are acquired only for the primary node (and only
7440 # when the allocator is used)
7441 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7444 # if we replace nodes *and* the old primary is offline, we don't
7445 # check the instance state
7446 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7447 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7448 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7449 msg="cannot recreate disks")
7452 self.disks = dict(self.op.disks)
7454 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7456 maxidx = max(self.disks.keys())
7457 if maxidx >= len(instance.disks):
7458 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7461 if ((self.op.nodes or self.op.iallocator) and
7462 sorted(self.disks.keys()) != range(len(instance.disks))):
7463 raise errors.OpPrereqError("Can't recreate disks partially and"
7464 " change the nodes at the same time",
7467 self.instance = instance
7469 if self.op.iallocator:
7470 self._RunAllocator()
7471 # Release unneeded node and node resource locks
7472 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7473 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7475 def Exec(self, feedback_fn):
7476 """Recreate the disks.
7479 instance = self.instance
7481 assert (self.owned_locks(locking.LEVEL_NODE) ==
7482 self.owned_locks(locking.LEVEL_NODE_RES))
7485 mods = [] # keeps track of needed changes
7487 for idx, disk in enumerate(instance.disks):
7489 changes = self.disks[idx]
7491 # Disk should not be recreated
7495 # update secondaries for disks, if needed
7496 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7497 # need to update the nodes and minors
7498 assert len(self.op.nodes) == 2
7499 assert len(disk.logical_id) == 6 # otherwise disk internals
7501 (_, _, old_port, _, _, old_secret) = disk.logical_id
7502 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7503 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7504 new_minors[0], new_minors[1], old_secret)
7505 assert len(disk.logical_id) == len(new_id)
7509 mods.append((idx, new_id, changes))
7511 # now that we have passed all asserts above, we can apply the mods
7512 # in a single run (to avoid partial changes)
7513 for idx, new_id, changes in mods:
7514 disk = instance.disks[idx]
7515 if new_id is not None:
7516 assert disk.dev_type == constants.LD_DRBD8
7517 disk.logical_id = new_id
7519 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7520 mode=changes.get(constants.IDISK_MODE, None))
7522 # change primary node, if needed
7524 instance.primary_node = self.op.nodes[0]
7525 self.LogWarning("Changing the instance's nodes, you will have to"
7526 " remove any disks left on the older nodes manually")
7529 self.cfg.Update(instance, feedback_fn)
7531 # All touched nodes must be locked
7532 mylocks = self.owned_locks(locking.LEVEL_NODE)
7533 assert mylocks.issuperset(frozenset(instance.all_nodes))
7534 _CreateDisks(self, instance, to_skip=to_skip)
7537 class LUInstanceRename(LogicalUnit):
7538 """Rename an instance.
7541 HPATH = "instance-rename"
7542 HTYPE = constants.HTYPE_INSTANCE
7544 def CheckArguments(self):
7548 if self.op.ip_check and not self.op.name_check:
7549 # TODO: make the ip check more flexible and not depend on the name check
7550 raise errors.OpPrereqError("IP address check requires a name check",
7553 def BuildHooksEnv(self):
7556 This runs on master, primary and secondary nodes of the instance.
7559 env = _BuildInstanceHookEnvByObject(self, self.instance)
7560 env["INSTANCE_NEW_NAME"] = self.op.new_name
7563 def BuildHooksNodes(self):
7564 """Build hooks nodes.
7567 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7570 def CheckPrereq(self):
7571 """Check prerequisites.
7573 This checks that the instance is in the cluster and is not running.
7576 self.op.instance_name = _ExpandInstanceName(self.cfg,
7577 self.op.instance_name)
7578 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7579 assert instance is not None
7580 _CheckNodeOnline(self, instance.primary_node)
7581 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7582 msg="cannot rename")
7583 self.instance = instance
7585 new_name = self.op.new_name
7586 if self.op.name_check:
7587 hostname = _CheckHostnameSane(self, new_name)
7588 new_name = self.op.new_name = hostname.name
7589 if (self.op.ip_check and
7590 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7591 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7592 (hostname.ip, new_name),
7593 errors.ECODE_NOTUNIQUE)
7595 instance_list = self.cfg.GetInstanceList()
7596 if new_name in instance_list and new_name != instance.name:
7597 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7598 new_name, errors.ECODE_EXISTS)
7600 def Exec(self, feedback_fn):
7601 """Rename the instance.
7604 inst = self.instance
7605 old_name = inst.name
7607 rename_file_storage = False
7608 if (inst.disk_template in constants.DTS_FILEBASED and
7609 self.op.new_name != inst.name):
7610 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7611 rename_file_storage = True
7613 self.cfg.RenameInstance(inst.name, self.op.new_name)
7614 # Change the instance lock. This is definitely safe while we hold the BGL.
7615 # Otherwise the new lock would have to be added in acquired mode.
7617 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7618 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7620 # re-read the instance from the configuration after rename
7621 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7623 if rename_file_storage:
7624 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7625 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7626 old_file_storage_dir,
7627 new_file_storage_dir)
7628 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7629 " (but the instance has been renamed in Ganeti)" %
7630 (inst.primary_node, old_file_storage_dir,
7631 new_file_storage_dir))
7633 _StartInstanceDisks(self, inst, None)
7634 # update info on disks
7635 info = _GetInstanceInfoText(inst)
7636 for (idx, disk) in enumerate(inst.disks):
7637 for node in inst.all_nodes:
7638 self.cfg.SetDiskID(disk, node)
7639 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7641 self.LogWarning("Error setting info on node %s for disk %s: %s",
7642 node, idx, result.fail_msg)
7644 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7645 old_name, self.op.debug_level)
7646 msg = result.fail_msg
7648 msg = ("Could not run OS rename script for instance %s on node %s"
7649 " (but the instance has been renamed in Ganeti): %s" %
7650 (inst.name, inst.primary_node, msg))
7651 self.proc.LogWarning(msg)
7653 _ShutdownInstanceDisks(self, inst)
7658 class LUInstanceRemove(LogicalUnit):
7659 """Remove an instance.
7662 HPATH = "instance-remove"
7663 HTYPE = constants.HTYPE_INSTANCE
7666 def ExpandNames(self):
7667 self._ExpandAndLockInstance()
7668 self.needed_locks[locking.LEVEL_NODE] = []
7669 self.needed_locks[locking.LEVEL_NODE_RES] = []
7670 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7672 def DeclareLocks(self, level):
7673 if level == locking.LEVEL_NODE:
7674 self._LockInstancesNodes()
7675 elif level == locking.LEVEL_NODE_RES:
7677 self.needed_locks[locking.LEVEL_NODE_RES] = \
7678 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7680 def BuildHooksEnv(self):
7683 This runs on master, primary and secondary nodes of the instance.
7686 env = _BuildInstanceHookEnvByObject(self, self.instance)
7687 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7690 def BuildHooksNodes(self):
7691 """Build hooks nodes.
7694 nl = [self.cfg.GetMasterNode()]
7695 nl_post = list(self.instance.all_nodes) + nl
7696 return (nl, nl_post)
7698 def CheckPrereq(self):
7699 """Check prerequisites.
7701 This checks that the instance is in the cluster.
7704 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7705 assert self.instance is not None, \
7706 "Cannot retrieve locked instance %s" % self.op.instance_name
7708 def Exec(self, feedback_fn):
7709 """Remove the instance.
7712 instance = self.instance
7713 logging.info("Shutting down instance %s on node %s",
7714 instance.name, instance.primary_node)
7716 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7717 self.op.shutdown_timeout)
7718 msg = result.fail_msg
7720 if self.op.ignore_failures:
7721 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7723 raise errors.OpExecError("Could not shutdown instance %s on"
7725 (instance.name, instance.primary_node, msg))
7727 assert (self.owned_locks(locking.LEVEL_NODE) ==
7728 self.owned_locks(locking.LEVEL_NODE_RES))
7729 assert not (set(instance.all_nodes) -
7730 self.owned_locks(locking.LEVEL_NODE)), \
7731 "Not owning correct locks"
7733 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7736 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7737 """Utility function to remove an instance.
7740 logging.info("Removing block devices for instance %s", instance.name)
7742 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7743 if not ignore_failures:
7744 raise errors.OpExecError("Can't remove instance's disks")
7745 feedback_fn("Warning: can't remove instance's disks")
7747 logging.info("Removing instance %s out of cluster config", instance.name)
7749 lu.cfg.RemoveInstance(instance.name)
7751 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7752 "Instance lock removal conflict"
7754 # Remove lock for the instance
7755 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7758 class LUInstanceQuery(NoHooksLU):
7759 """Logical unit for querying instances.
7762 # pylint: disable=W0142
7765 def CheckArguments(self):
7766 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7767 self.op.output_fields, self.op.use_locking)
7769 def ExpandNames(self):
7770 self.iq.ExpandNames(self)
7772 def DeclareLocks(self, level):
7773 self.iq.DeclareLocks(self, level)
7775 def Exec(self, feedback_fn):
7776 return self.iq.OldStyleQuery(self)
7779 class LUInstanceFailover(LogicalUnit):
7780 """Failover an instance.
7783 HPATH = "instance-failover"
7784 HTYPE = constants.HTYPE_INSTANCE
7787 def CheckArguments(self):
7788 """Check the arguments.
7791 self.iallocator = getattr(self.op, "iallocator", None)
7792 self.target_node = getattr(self.op, "target_node", None)
7794 def ExpandNames(self):
7795 self._ExpandAndLockInstance()
7797 if self.op.target_node is not None:
7798 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7800 self.needed_locks[locking.LEVEL_NODE] = []
7801 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7803 self.needed_locks[locking.LEVEL_NODE_RES] = []
7804 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7806 ignore_consistency = self.op.ignore_consistency
7807 shutdown_timeout = self.op.shutdown_timeout
7808 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7811 ignore_consistency=ignore_consistency,
7812 shutdown_timeout=shutdown_timeout,
7813 ignore_ipolicy=self.op.ignore_ipolicy)
7814 self.tasklets = [self._migrater]
7816 def DeclareLocks(self, level):
7817 if level == locking.LEVEL_NODE:
7818 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7819 if instance.disk_template in constants.DTS_EXT_MIRROR:
7820 if self.op.target_node is None:
7821 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7823 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7824 self.op.target_node]
7825 del self.recalculate_locks[locking.LEVEL_NODE]
7827 self._LockInstancesNodes()
7828 elif level == locking.LEVEL_NODE_RES:
7830 self.needed_locks[locking.LEVEL_NODE_RES] = \
7831 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7833 def BuildHooksEnv(self):
7836 This runs on master, primary and secondary nodes of the instance.
7839 instance = self._migrater.instance
7840 source_node = instance.primary_node
7841 target_node = self.op.target_node
7843 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7844 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7845 "OLD_PRIMARY": source_node,
7846 "NEW_PRIMARY": target_node,
7849 if instance.disk_template in constants.DTS_INT_MIRROR:
7850 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7851 env["NEW_SECONDARY"] = source_node
7853 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7855 env.update(_BuildInstanceHookEnvByObject(self, instance))
7859 def BuildHooksNodes(self):
7860 """Build hooks nodes.
7863 instance = self._migrater.instance
7864 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7865 return (nl, nl + [instance.primary_node])
7868 class LUInstanceMigrate(LogicalUnit):
7869 """Migrate an instance.
7871 This is migration without shutting down, compared to the failover,
7872 which is done with shutdown.
7875 HPATH = "instance-migrate"
7876 HTYPE = constants.HTYPE_INSTANCE
7879 def ExpandNames(self):
7880 self._ExpandAndLockInstance()
7882 if self.op.target_node is not None:
7883 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7885 self.needed_locks[locking.LEVEL_NODE] = []
7886 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7888 self.needed_locks[locking.LEVEL_NODE] = []
7889 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7892 TLMigrateInstance(self, self.op.instance_name,
7893 cleanup=self.op.cleanup,
7895 fallback=self.op.allow_failover,
7896 allow_runtime_changes=self.op.allow_runtime_changes,
7897 ignore_ipolicy=self.op.ignore_ipolicy)
7898 self.tasklets = [self._migrater]
7900 def DeclareLocks(self, level):
7901 if level == locking.LEVEL_NODE:
7902 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7903 if instance.disk_template in constants.DTS_EXT_MIRROR:
7904 if self.op.target_node is None:
7905 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7907 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7908 self.op.target_node]
7909 del self.recalculate_locks[locking.LEVEL_NODE]
7911 self._LockInstancesNodes()
7912 elif level == locking.LEVEL_NODE_RES:
7914 self.needed_locks[locking.LEVEL_NODE_RES] = \
7915 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7917 def BuildHooksEnv(self):
7920 This runs on master, primary and secondary nodes of the instance.
7923 instance = self._migrater.instance
7924 source_node = instance.primary_node
7925 target_node = self.op.target_node
7926 env = _BuildInstanceHookEnvByObject(self, instance)
7928 "MIGRATE_LIVE": self._migrater.live,
7929 "MIGRATE_CLEANUP": self.op.cleanup,
7930 "OLD_PRIMARY": source_node,
7931 "NEW_PRIMARY": target_node,
7932 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7935 if instance.disk_template in constants.DTS_INT_MIRROR:
7936 env["OLD_SECONDARY"] = target_node
7937 env["NEW_SECONDARY"] = source_node
7939 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7943 def BuildHooksNodes(self):
7944 """Build hooks nodes.
7947 instance = self._migrater.instance
7948 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7949 return (nl, nl + [instance.primary_node])
7952 class LUInstanceMove(LogicalUnit):
7953 """Move an instance by data-copying.
7956 HPATH = "instance-move"
7957 HTYPE = constants.HTYPE_INSTANCE
7960 def ExpandNames(self):
7961 self._ExpandAndLockInstance()
7962 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7963 self.op.target_node = target_node
7964 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7965 self.needed_locks[locking.LEVEL_NODE_RES] = []
7966 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7968 def DeclareLocks(self, level):
7969 if level == locking.LEVEL_NODE:
7970 self._LockInstancesNodes(primary_only=True)
7971 elif level == locking.LEVEL_NODE_RES:
7973 self.needed_locks[locking.LEVEL_NODE_RES] = \
7974 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7976 def BuildHooksEnv(self):
7979 This runs on master, primary and secondary nodes of the instance.
7983 "TARGET_NODE": self.op.target_node,
7984 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7986 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7989 def BuildHooksNodes(self):
7990 """Build hooks nodes.
7994 self.cfg.GetMasterNode(),
7995 self.instance.primary_node,
7996 self.op.target_node,
8000 def CheckPrereq(self):
8001 """Check prerequisites.
8003 This checks that the instance is in the cluster.
8006 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8007 assert self.instance is not None, \
8008 "Cannot retrieve locked instance %s" % self.op.instance_name
8010 node = self.cfg.GetNodeInfo(self.op.target_node)
8011 assert node is not None, \
8012 "Cannot retrieve locked node %s" % self.op.target_node
8014 self.target_node = target_node = node.name
8016 if target_node == instance.primary_node:
8017 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8018 (instance.name, target_node),
8021 bep = self.cfg.GetClusterInfo().FillBE(instance)
8023 for idx, dsk in enumerate(instance.disks):
8024 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8025 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8026 " cannot copy" % idx, errors.ECODE_STATE)
8028 _CheckNodeOnline(self, target_node)
8029 _CheckNodeNotDrained(self, target_node)
8030 _CheckNodeVmCapable(self, target_node)
8031 cluster = self.cfg.GetClusterInfo()
8032 group_info = self.cfg.GetNodeGroup(node.group)
8033 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8034 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8035 ignore=self.op.ignore_ipolicy)
8037 if instance.admin_state == constants.ADMINST_UP:
8038 # check memory requirements on the secondary node
8039 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8040 instance.name, bep[constants.BE_MAXMEM],
8041 instance.hypervisor)
8043 self.LogInfo("Not checking memory on the secondary node as"
8044 " instance will not be started")
8046 # check bridge existance
8047 _CheckInstanceBridgesExist(self, instance, node=target_node)
8049 def Exec(self, feedback_fn):
8050 """Move an instance.
8052 The move is done by shutting it down on its present node, copying
8053 the data over (slow) and starting it on the new node.
8056 instance = self.instance
8058 source_node = instance.primary_node
8059 target_node = self.target_node
8061 self.LogInfo("Shutting down instance %s on source node %s",
8062 instance.name, source_node)
8064 assert (self.owned_locks(locking.LEVEL_NODE) ==
8065 self.owned_locks(locking.LEVEL_NODE_RES))
8067 result = self.rpc.call_instance_shutdown(source_node, instance,
8068 self.op.shutdown_timeout)
8069 msg = result.fail_msg
8071 if self.op.ignore_consistency:
8072 self.proc.LogWarning("Could not shutdown instance %s on node %s."
8073 " Proceeding anyway. Please make sure node"
8074 " %s is down. Error details: %s",
8075 instance.name, source_node, source_node, msg)
8077 raise errors.OpExecError("Could not shutdown instance %s on"
8079 (instance.name, source_node, msg))
8081 # create the target disks
8083 _CreateDisks(self, instance, target_node=target_node)
8084 except errors.OpExecError:
8085 self.LogWarning("Device creation failed, reverting...")
8087 _RemoveDisks(self, instance, target_node=target_node)
8089 self.cfg.ReleaseDRBDMinors(instance.name)
8092 cluster_name = self.cfg.GetClusterInfo().cluster_name
8095 # activate, get path, copy the data over
8096 for idx, disk in enumerate(instance.disks):
8097 self.LogInfo("Copying data for disk %d", idx)
8098 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8099 instance.name, True, idx)
8101 self.LogWarning("Can't assemble newly created disk %d: %s",
8102 idx, result.fail_msg)
8103 errs.append(result.fail_msg)
8105 dev_path = result.payload
8106 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8107 target_node, dev_path,
8110 self.LogWarning("Can't copy data over for disk %d: %s",
8111 idx, result.fail_msg)
8112 errs.append(result.fail_msg)
8116 self.LogWarning("Some disks failed to copy, aborting")
8118 _RemoveDisks(self, instance, target_node=target_node)
8120 self.cfg.ReleaseDRBDMinors(instance.name)
8121 raise errors.OpExecError("Errors during disk copy: %s" %
8124 instance.primary_node = target_node
8125 self.cfg.Update(instance, feedback_fn)
8127 self.LogInfo("Removing the disks on the original node")
8128 _RemoveDisks(self, instance, target_node=source_node)
8130 # Only start the instance if it's marked as up
8131 if instance.admin_state == constants.ADMINST_UP:
8132 self.LogInfo("Starting instance %s on node %s",
8133 instance.name, target_node)
8135 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8136 ignore_secondaries=True)
8138 _ShutdownInstanceDisks(self, instance)
8139 raise errors.OpExecError("Can't activate the instance's disks")
8141 result = self.rpc.call_instance_start(target_node,
8142 (instance, None, None), False)
8143 msg = result.fail_msg
8145 _ShutdownInstanceDisks(self, instance)
8146 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8147 (instance.name, target_node, msg))
8150 class LUNodeMigrate(LogicalUnit):
8151 """Migrate all instances from a node.
8154 HPATH = "node-migrate"
8155 HTYPE = constants.HTYPE_NODE
8158 def CheckArguments(self):
8161 def ExpandNames(self):
8162 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8164 self.share_locks = _ShareAll()
8165 self.needed_locks = {
8166 locking.LEVEL_NODE: [self.op.node_name],
8169 def BuildHooksEnv(self):
8172 This runs on the master, the primary and all the secondaries.
8176 "NODE_NAME": self.op.node_name,
8177 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8180 def BuildHooksNodes(self):
8181 """Build hooks nodes.
8184 nl = [self.cfg.GetMasterNode()]
8187 def CheckPrereq(self):
8190 def Exec(self, feedback_fn):
8191 # Prepare jobs for migration instances
8192 allow_runtime_changes = self.op.allow_runtime_changes
8194 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8197 iallocator=self.op.iallocator,
8198 target_node=self.op.target_node,
8199 allow_runtime_changes=allow_runtime_changes,
8200 ignore_ipolicy=self.op.ignore_ipolicy)]
8201 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8203 # TODO: Run iallocator in this opcode and pass correct placement options to
8204 # OpInstanceMigrate. Since other jobs can modify the cluster between
8205 # running the iallocator and the actual migration, a good consistency model
8206 # will have to be found.
8208 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8209 frozenset([self.op.node_name]))
8211 return ResultWithJobs(jobs)
8214 class TLMigrateInstance(Tasklet):
8215 """Tasklet class for instance migration.
8218 @ivar live: whether the migration will be done live or non-live;
8219 this variable is initalized only after CheckPrereq has run
8220 @type cleanup: boolean
8221 @ivar cleanup: Wheater we cleanup from a failed migration
8222 @type iallocator: string
8223 @ivar iallocator: The iallocator used to determine target_node
8224 @type target_node: string
8225 @ivar target_node: If given, the target_node to reallocate the instance to
8226 @type failover: boolean
8227 @ivar failover: Whether operation results in failover or migration
8228 @type fallback: boolean
8229 @ivar fallback: Whether fallback to failover is allowed if migration not
8231 @type ignore_consistency: boolean
8232 @ivar ignore_consistency: Wheter we should ignore consistency between source
8234 @type shutdown_timeout: int
8235 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8236 @type ignore_ipolicy: bool
8237 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8242 _MIGRATION_POLL_INTERVAL = 1 # seconds
8243 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8245 def __init__(self, lu, instance_name, cleanup=False,
8246 failover=False, fallback=False,
8247 ignore_consistency=False,
8248 allow_runtime_changes=True,
8249 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8250 ignore_ipolicy=False):
8251 """Initializes this class.
8254 Tasklet.__init__(self, lu)
8257 self.instance_name = instance_name
8258 self.cleanup = cleanup
8259 self.live = False # will be overridden later
8260 self.failover = failover
8261 self.fallback = fallback
8262 self.ignore_consistency = ignore_consistency
8263 self.shutdown_timeout = shutdown_timeout
8264 self.ignore_ipolicy = ignore_ipolicy
8265 self.allow_runtime_changes = allow_runtime_changes
8267 def CheckPrereq(self):
8268 """Check prerequisites.
8270 This checks that the instance is in the cluster.
8273 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8274 instance = self.cfg.GetInstanceInfo(instance_name)
8275 assert instance is not None
8276 self.instance = instance
8277 cluster = self.cfg.GetClusterInfo()
8279 if (not self.cleanup and
8280 not instance.admin_state == constants.ADMINST_UP and
8281 not self.failover and self.fallback):
8282 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8283 " switching to failover")
8284 self.failover = True
8286 if instance.disk_template not in constants.DTS_MIRRORED:
8291 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8292 " %s" % (instance.disk_template, text),
8295 if instance.disk_template in constants.DTS_EXT_MIRROR:
8296 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8298 if self.lu.op.iallocator:
8299 self._RunAllocator()
8301 # We set set self.target_node as it is required by
8303 self.target_node = self.lu.op.target_node
8305 # Check that the target node is correct in terms of instance policy
8306 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8307 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8308 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8310 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8311 ignore=self.ignore_ipolicy)
8313 # self.target_node is already populated, either directly or by the
8315 target_node = self.target_node
8316 if self.target_node == instance.primary_node:
8317 raise errors.OpPrereqError("Cannot migrate instance %s"
8318 " to its primary (%s)" %
8319 (instance.name, instance.primary_node),
8322 if len(self.lu.tasklets) == 1:
8323 # It is safe to release locks only when we're the only tasklet
8325 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8326 keep=[instance.primary_node, self.target_node])
8329 secondary_nodes = instance.secondary_nodes
8330 if not secondary_nodes:
8331 raise errors.ConfigurationError("No secondary node but using"
8332 " %s disk template" %
8333 instance.disk_template)
8334 target_node = secondary_nodes[0]
8335 if self.lu.op.iallocator or (self.lu.op.target_node and
8336 self.lu.op.target_node != target_node):
8338 text = "failed over"
8341 raise errors.OpPrereqError("Instances with disk template %s cannot"
8342 " be %s to arbitrary nodes"
8343 " (neither an iallocator nor a target"
8344 " node can be passed)" %
8345 (instance.disk_template, text),
8347 nodeinfo = self.cfg.GetNodeInfo(target_node)
8348 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8349 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8351 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8352 ignore=self.ignore_ipolicy)
8354 i_be = cluster.FillBE(instance)
8356 # check memory requirements on the secondary node
8357 if (not self.cleanup and
8358 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8359 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8360 "migrating instance %s" %
8362 i_be[constants.BE_MINMEM],
8363 instance.hypervisor)
8365 self.lu.LogInfo("Not checking memory on the secondary node as"
8366 " instance will not be started")
8368 # check if failover must be forced instead of migration
8369 if (not self.cleanup and not self.failover and
8370 i_be[constants.BE_ALWAYS_FAILOVER]):
8371 self.lu.LogInfo("Instance configured to always failover; fallback"
8373 self.failover = True
8375 # check bridge existance
8376 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8378 if not self.cleanup:
8379 _CheckNodeNotDrained(self.lu, target_node)
8380 if not self.failover:
8381 result = self.rpc.call_instance_migratable(instance.primary_node,
8383 if result.fail_msg and self.fallback:
8384 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8386 self.failover = True
8388 result.Raise("Can't migrate, please use failover",
8389 prereq=True, ecode=errors.ECODE_STATE)
8391 assert not (self.failover and self.cleanup)
8393 if not self.failover:
8394 if self.lu.op.live is not None and self.lu.op.mode is not None:
8395 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8396 " parameters are accepted",
8398 if self.lu.op.live is not None:
8400 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8402 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8403 # reset the 'live' parameter to None so that repeated
8404 # invocations of CheckPrereq do not raise an exception
8405 self.lu.op.live = None
8406 elif self.lu.op.mode is None:
8407 # read the default value from the hypervisor
8408 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8409 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8411 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8413 # Failover is never live
8416 if not (self.failover or self.cleanup):
8417 remote_info = self.rpc.call_instance_info(instance.primary_node,
8419 instance.hypervisor)
8420 remote_info.Raise("Error checking instance on node %s" %
8421 instance.primary_node)
8422 instance_running = bool(remote_info.payload)
8423 if instance_running:
8424 self.current_mem = int(remote_info.payload["memory"])
8426 def _RunAllocator(self):
8427 """Run the allocator based on input opcode.
8430 # FIXME: add a self.ignore_ipolicy option
8431 req = iallocator.IAReqRelocate(name=self.instance_name,
8432 relocate_from=[self.instance.primary_node])
8433 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8435 ial.Run(self.lu.op.iallocator)
8438 raise errors.OpPrereqError("Can't compute nodes using"
8439 " iallocator '%s': %s" %
8440 (self.lu.op.iallocator, ial.info),
8442 self.target_node = ial.result[0]
8443 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8444 self.instance_name, self.lu.op.iallocator,
8445 utils.CommaJoin(ial.result))
8447 def _WaitUntilSync(self):
8448 """Poll with custom rpc for disk sync.
8450 This uses our own step-based rpc call.
8453 self.feedback_fn("* wait until resync is done")
8457 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8459 (self.instance.disks,
8462 for node, nres in result.items():
8463 nres.Raise("Cannot resync disks on node %s" % node)
8464 node_done, node_percent = nres.payload
8465 all_done = all_done and node_done
8466 if node_percent is not None:
8467 min_percent = min(min_percent, node_percent)
8469 if min_percent < 100:
8470 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8473 def _EnsureSecondary(self, node):
8474 """Demote a node to secondary.
8477 self.feedback_fn("* switching node %s to secondary mode" % node)
8479 for dev in self.instance.disks:
8480 self.cfg.SetDiskID(dev, node)
8482 result = self.rpc.call_blockdev_close(node, self.instance.name,
8483 self.instance.disks)
8484 result.Raise("Cannot change disk to secondary on node %s" % node)
8486 def _GoStandalone(self):
8487 """Disconnect from the network.
8490 self.feedback_fn("* changing into standalone mode")
8491 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8492 self.instance.disks)
8493 for node, nres in result.items():
8494 nres.Raise("Cannot disconnect disks node %s" % node)
8496 def _GoReconnect(self, multimaster):
8497 """Reconnect to the network.
8503 msg = "single-master"
8504 self.feedback_fn("* changing disks into %s mode" % msg)
8505 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8506 (self.instance.disks, self.instance),
8507 self.instance.name, multimaster)
8508 for node, nres in result.items():
8509 nres.Raise("Cannot change disks config on node %s" % node)
8511 def _ExecCleanup(self):
8512 """Try to cleanup after a failed migration.
8514 The cleanup is done by:
8515 - check that the instance is running only on one node
8516 (and update the config if needed)
8517 - change disks on its secondary node to secondary
8518 - wait until disks are fully synchronized
8519 - disconnect from the network
8520 - change disks into single-master mode
8521 - wait again until disks are fully synchronized
8524 instance = self.instance
8525 target_node = self.target_node
8526 source_node = self.source_node
8528 # check running on only one node
8529 self.feedback_fn("* checking where the instance actually runs"
8530 " (if this hangs, the hypervisor might be in"
8532 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8533 for node, result in ins_l.items():
8534 result.Raise("Can't contact node %s" % node)
8536 runningon_source = instance.name in ins_l[source_node].payload
8537 runningon_target = instance.name in ins_l[target_node].payload
8539 if runningon_source and runningon_target:
8540 raise errors.OpExecError("Instance seems to be running on two nodes,"
8541 " or the hypervisor is confused; you will have"
8542 " to ensure manually that it runs only on one"
8543 " and restart this operation")
8545 if not (runningon_source or runningon_target):
8546 raise errors.OpExecError("Instance does not seem to be running at all;"
8547 " in this case it's safer to repair by"
8548 " running 'gnt-instance stop' to ensure disk"
8549 " shutdown, and then restarting it")
8551 if runningon_target:
8552 # the migration has actually succeeded, we need to update the config
8553 self.feedback_fn("* instance running on secondary node (%s),"
8554 " updating config" % target_node)
8555 instance.primary_node = target_node
8556 self.cfg.Update(instance, self.feedback_fn)
8557 demoted_node = source_node
8559 self.feedback_fn("* instance confirmed to be running on its"
8560 " primary node (%s)" % source_node)
8561 demoted_node = target_node
8563 if instance.disk_template in constants.DTS_INT_MIRROR:
8564 self._EnsureSecondary(demoted_node)
8566 self._WaitUntilSync()
8567 except errors.OpExecError:
8568 # we ignore here errors, since if the device is standalone, it
8569 # won't be able to sync
8571 self._GoStandalone()
8572 self._GoReconnect(False)
8573 self._WaitUntilSync()
8575 self.feedback_fn("* done")
8577 def _RevertDiskStatus(self):
8578 """Try to revert the disk status after a failed migration.
8581 target_node = self.target_node
8582 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8586 self._EnsureSecondary(target_node)
8587 self._GoStandalone()
8588 self._GoReconnect(False)
8589 self._WaitUntilSync()
8590 except errors.OpExecError, err:
8591 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8592 " please try to recover the instance manually;"
8593 " error '%s'" % str(err))
8595 def _AbortMigration(self):
8596 """Call the hypervisor code to abort a started migration.
8599 instance = self.instance
8600 target_node = self.target_node
8601 source_node = self.source_node
8602 migration_info = self.migration_info
8604 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8608 abort_msg = abort_result.fail_msg
8610 logging.error("Aborting migration failed on target node %s: %s",
8611 target_node, abort_msg)
8612 # Don't raise an exception here, as we stil have to try to revert the
8613 # disk status, even if this step failed.
8615 abort_result = self.rpc.call_instance_finalize_migration_src(
8616 source_node, instance, False, self.live)
8617 abort_msg = abort_result.fail_msg
8619 logging.error("Aborting migration failed on source node %s: %s",
8620 source_node, abort_msg)
8622 def _ExecMigration(self):
8623 """Migrate an instance.
8625 The migrate is done by:
8626 - change the disks into dual-master mode
8627 - wait until disks are fully synchronized again
8628 - migrate the instance
8629 - change disks on the new secondary node (the old primary) to secondary
8630 - wait until disks are fully synchronized
8631 - change disks into single-master mode
8634 instance = self.instance
8635 target_node = self.target_node
8636 source_node = self.source_node
8638 # Check for hypervisor version mismatch and warn the user.
8639 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8640 None, [self.instance.hypervisor])
8641 for ninfo in nodeinfo.values():
8642 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8644 (_, _, (src_info, )) = nodeinfo[source_node].payload
8645 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8647 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8648 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8649 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8650 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8651 if src_version != dst_version:
8652 self.feedback_fn("* warning: hypervisor version mismatch between"
8653 " source (%s) and target (%s) node" %
8654 (src_version, dst_version))
8656 self.feedback_fn("* checking disk consistency between source and target")
8657 for (idx, dev) in enumerate(instance.disks):
8658 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8659 raise errors.OpExecError("Disk %s is degraded or not fully"
8660 " synchronized on target node,"
8661 " aborting migration" % idx)
8663 if self.current_mem > self.tgt_free_mem:
8664 if not self.allow_runtime_changes:
8665 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8666 " free memory to fit instance %s on target"
8667 " node %s (have %dMB, need %dMB)" %
8668 (instance.name, target_node,
8669 self.tgt_free_mem, self.current_mem))
8670 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8671 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8674 rpcres.Raise("Cannot modify instance runtime memory")
8676 # First get the migration information from the remote node
8677 result = self.rpc.call_migration_info(source_node, instance)
8678 msg = result.fail_msg
8680 log_err = ("Failed fetching source migration information from %s: %s" %
8682 logging.error(log_err)
8683 raise errors.OpExecError(log_err)
8685 self.migration_info = migration_info = result.payload
8687 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8688 # Then switch the disks to master/master mode
8689 self._EnsureSecondary(target_node)
8690 self._GoStandalone()
8691 self._GoReconnect(True)
8692 self._WaitUntilSync()
8694 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8695 result = self.rpc.call_accept_instance(target_node,
8698 self.nodes_ip[target_node])
8700 msg = result.fail_msg
8702 logging.error("Instance pre-migration failed, trying to revert"
8703 " disk status: %s", msg)
8704 self.feedback_fn("Pre-migration failed, aborting")
8705 self._AbortMigration()
8706 self._RevertDiskStatus()
8707 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8708 (instance.name, msg))
8710 self.feedback_fn("* migrating instance to %s" % target_node)
8711 result = self.rpc.call_instance_migrate(source_node, instance,
8712 self.nodes_ip[target_node],
8714 msg = result.fail_msg
8716 logging.error("Instance migration failed, trying to revert"
8717 " disk status: %s", msg)
8718 self.feedback_fn("Migration failed, aborting")
8719 self._AbortMigration()
8720 self._RevertDiskStatus()
8721 raise errors.OpExecError("Could not migrate instance %s: %s" %
8722 (instance.name, msg))
8724 self.feedback_fn("* starting memory transfer")
8725 last_feedback = time.time()
8727 result = self.rpc.call_instance_get_migration_status(source_node,
8729 msg = result.fail_msg
8730 ms = result.payload # MigrationStatus instance
8731 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8732 logging.error("Instance migration failed, trying to revert"
8733 " disk status: %s", msg)
8734 self.feedback_fn("Migration failed, aborting")
8735 self._AbortMigration()
8736 self._RevertDiskStatus()
8738 msg = "hypervisor returned failure"
8739 raise errors.OpExecError("Could not migrate instance %s: %s" %
8740 (instance.name, msg))
8742 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8743 self.feedback_fn("* memory transfer complete")
8746 if (utils.TimeoutExpired(last_feedback,
8747 self._MIGRATION_FEEDBACK_INTERVAL) and
8748 ms.transferred_ram is not None):
8749 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8750 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8751 last_feedback = time.time()
8753 time.sleep(self._MIGRATION_POLL_INTERVAL)
8755 result = self.rpc.call_instance_finalize_migration_src(source_node,
8759 msg = result.fail_msg
8761 logging.error("Instance migration succeeded, but finalization failed"
8762 " on the source node: %s", msg)
8763 raise errors.OpExecError("Could not finalize instance migration: %s" %
8766 instance.primary_node = target_node
8768 # distribute new instance config to the other nodes
8769 self.cfg.Update(instance, self.feedback_fn)
8771 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8775 msg = result.fail_msg
8777 logging.error("Instance migration succeeded, but finalization failed"
8778 " on the target node: %s", msg)
8779 raise errors.OpExecError("Could not finalize instance migration: %s" %
8782 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8783 self._EnsureSecondary(source_node)
8784 self._WaitUntilSync()
8785 self._GoStandalone()
8786 self._GoReconnect(False)
8787 self._WaitUntilSync()
8789 # If the instance's disk template is `rbd' and there was a successful
8790 # migration, unmap the device from the source node.
8791 if self.instance.disk_template == constants.DT_RBD:
8792 disks = _ExpandCheckDisks(instance, instance.disks)
8793 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8795 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8796 msg = result.fail_msg
8798 logging.error("Migration was successful, but couldn't unmap the"
8799 " block device %s on source node %s: %s",
8800 disk.iv_name, source_node, msg)
8801 logging.error("You need to unmap the device %s manually on %s",
8802 disk.iv_name, source_node)
8804 self.feedback_fn("* done")
8806 def _ExecFailover(self):
8807 """Failover an instance.
8809 The failover is done by shutting it down on its present node and
8810 starting it on the secondary.
8813 instance = self.instance
8814 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8816 source_node = instance.primary_node
8817 target_node = self.target_node
8819 if instance.admin_state == constants.ADMINST_UP:
8820 self.feedback_fn("* checking disk consistency between source and target")
8821 for (idx, dev) in enumerate(instance.disks):
8822 # for drbd, these are drbd over lvm
8823 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8825 if primary_node.offline:
8826 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8828 (primary_node.name, idx, target_node))
8829 elif not self.ignore_consistency:
8830 raise errors.OpExecError("Disk %s is degraded on target node,"
8831 " aborting failover" % idx)
8833 self.feedback_fn("* not checking disk consistency as instance is not"
8836 self.feedback_fn("* shutting down instance on source node")
8837 logging.info("Shutting down instance %s on node %s",
8838 instance.name, source_node)
8840 result = self.rpc.call_instance_shutdown(source_node, instance,
8841 self.shutdown_timeout)
8842 msg = result.fail_msg
8844 if self.ignore_consistency or primary_node.offline:
8845 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8846 " proceeding anyway; please make sure node"
8847 " %s is down; error details: %s",
8848 instance.name, source_node, source_node, msg)
8850 raise errors.OpExecError("Could not shutdown instance %s on"
8852 (instance.name, source_node, msg))
8854 self.feedback_fn("* deactivating the instance's disks on source node")
8855 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8856 raise errors.OpExecError("Can't shut down the instance's disks")
8858 instance.primary_node = target_node
8859 # distribute new instance config to the other nodes
8860 self.cfg.Update(instance, self.feedback_fn)
8862 # Only start the instance if it's marked as up
8863 if instance.admin_state == constants.ADMINST_UP:
8864 self.feedback_fn("* activating the instance's disks on target node %s" %
8866 logging.info("Starting instance %s on node %s",
8867 instance.name, target_node)
8869 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8870 ignore_secondaries=True)
8872 _ShutdownInstanceDisks(self.lu, instance)
8873 raise errors.OpExecError("Can't activate the instance's disks")
8875 self.feedback_fn("* starting the instance on the target node %s" %
8877 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8879 msg = result.fail_msg
8881 _ShutdownInstanceDisks(self.lu, instance)
8882 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8883 (instance.name, target_node, msg))
8885 def Exec(self, feedback_fn):
8886 """Perform the migration.
8889 self.feedback_fn = feedback_fn
8890 self.source_node = self.instance.primary_node
8892 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8893 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8894 self.target_node = self.instance.secondary_nodes[0]
8895 # Otherwise self.target_node has been populated either
8896 # directly, or through an iallocator.
8898 self.all_nodes = [self.source_node, self.target_node]
8899 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8900 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8903 feedback_fn("Failover instance %s" % self.instance.name)
8904 self._ExecFailover()
8906 feedback_fn("Migrating instance %s" % self.instance.name)
8909 return self._ExecCleanup()
8911 return self._ExecMigration()
8914 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8916 """Wrapper around L{_CreateBlockDevInner}.
8918 This method annotates the root device first.
8921 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8922 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8926 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8928 """Create a tree of block devices on a given node.
8930 If this device type has to be created on secondaries, create it and
8933 If not, just recurse to children keeping the same 'force' value.
8935 @attention: The device has to be annotated already.
8937 @param lu: the lu on whose behalf we execute
8938 @param node: the node on which to create the device
8939 @type instance: L{objects.Instance}
8940 @param instance: the instance which owns the device
8941 @type device: L{objects.Disk}
8942 @param device: the device to create
8943 @type force_create: boolean
8944 @param force_create: whether to force creation of this device; this
8945 will be change to True whenever we find a device which has
8946 CreateOnSecondary() attribute
8947 @param info: the extra 'metadata' we should attach to the device
8948 (this will be represented as a LVM tag)
8949 @type force_open: boolean
8950 @param force_open: this parameter will be passes to the
8951 L{backend.BlockdevCreate} function where it specifies
8952 whether we run on primary or not, and it affects both
8953 the child assembly and the device own Open() execution
8956 if device.CreateOnSecondary():
8960 for child in device.children:
8961 _CreateBlockDevInner(lu, node, instance, child, force_create,
8964 if not force_create:
8967 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8970 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8971 """Create a single block device on a given node.
8973 This will not recurse over children of the device, so they must be
8976 @param lu: the lu on whose behalf we execute
8977 @param node: the node on which to create the device
8978 @type instance: L{objects.Instance}
8979 @param instance: the instance which owns the device
8980 @type device: L{objects.Disk}
8981 @param device: the device to create
8982 @param info: the extra 'metadata' we should attach to the device
8983 (this will be represented as a LVM tag)
8984 @type force_open: boolean
8985 @param force_open: this parameter will be passes to the
8986 L{backend.BlockdevCreate} function where it specifies
8987 whether we run on primary or not, and it affects both
8988 the child assembly and the device own Open() execution
8991 lu.cfg.SetDiskID(device, node)
8992 result = lu.rpc.call_blockdev_create(node, device, device.size,
8993 instance.name, force_open, info)
8994 result.Raise("Can't create block device %s on"
8995 " node %s for instance %s" % (device, node, instance.name))
8996 if device.physical_id is None:
8997 device.physical_id = result.payload
9000 def _GenerateUniqueNames(lu, exts):
9001 """Generate a suitable LV name.
9003 This will generate a logical volume name for the given instance.
9008 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9009 results.append("%s%s" % (new_id, val))
9013 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9014 iv_name, p_minor, s_minor):
9015 """Generate a drbd8 device complete with its children.
9018 assert len(vgnames) == len(names) == 2
9019 port = lu.cfg.AllocatePort()
9020 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9022 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9023 logical_id=(vgnames[0], names[0]),
9025 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9026 size=constants.DRBD_META_SIZE,
9027 logical_id=(vgnames[1], names[1]),
9029 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9030 logical_id=(primary, secondary, port,
9033 children=[dev_data, dev_meta],
9034 iv_name=iv_name, params={})
9038 _DISK_TEMPLATE_NAME_PREFIX = {
9039 constants.DT_PLAIN: "",
9040 constants.DT_RBD: ".rbd",
9044 _DISK_TEMPLATE_DEVICE_TYPE = {
9045 constants.DT_PLAIN: constants.LD_LV,
9046 constants.DT_FILE: constants.LD_FILE,
9047 constants.DT_SHARED_FILE: constants.LD_FILE,
9048 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9049 constants.DT_RBD: constants.LD_RBD,
9053 def _GenerateDiskTemplate(
9054 lu, template_name, instance_name, primary_node, secondary_nodes,
9055 disk_info, file_storage_dir, file_driver, base_index,
9056 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9057 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9058 """Generate the entire disk layout for a given template type.
9061 #TODO: compute space requirements
9063 vgname = lu.cfg.GetVGName()
9064 disk_count = len(disk_info)
9067 if template_name == constants.DT_DISKLESS:
9069 elif template_name == constants.DT_DRBD8:
9070 if len(secondary_nodes) != 1:
9071 raise errors.ProgrammerError("Wrong template configuration")
9072 remote_node = secondary_nodes[0]
9073 minors = lu.cfg.AllocateDRBDMinor(
9074 [primary_node, remote_node] * len(disk_info), instance_name)
9076 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9078 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9081 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9082 for i in range(disk_count)]):
9083 names.append(lv_prefix + "_data")
9084 names.append(lv_prefix + "_meta")
9085 for idx, disk in enumerate(disk_info):
9086 disk_index = idx + base_index
9087 data_vg = disk.get(constants.IDISK_VG, vgname)
9088 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9089 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9090 disk[constants.IDISK_SIZE],
9092 names[idx * 2:idx * 2 + 2],
9093 "disk/%d" % disk_index,
9094 minors[idx * 2], minors[idx * 2 + 1])
9095 disk_dev.mode = disk[constants.IDISK_MODE]
9096 disks.append(disk_dev)
9099 raise errors.ProgrammerError("Wrong template configuration")
9101 if template_name == constants.DT_FILE:
9103 elif template_name == constants.DT_SHARED_FILE:
9104 _req_shr_file_storage()
9106 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9107 if name_prefix is None:
9110 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9111 (name_prefix, base_index + i)
9112 for i in range(disk_count)])
9114 if template_name == constants.DT_PLAIN:
9116 def logical_id_fn(idx, _, disk):
9117 vg = disk.get(constants.IDISK_VG, vgname)
9118 return (vg, names[idx])
9120 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9122 lambda _, disk_index, disk: (file_driver,
9123 "%s/disk%d" % (file_storage_dir,
9125 elif template_name == constants.DT_BLOCK:
9127 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9128 disk[constants.IDISK_ADOPT])
9129 elif template_name == constants.DT_RBD:
9130 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9132 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9134 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9136 for idx, disk in enumerate(disk_info):
9137 disk_index = idx + base_index
9138 size = disk[constants.IDISK_SIZE]
9139 feedback_fn("* disk %s, size %s" %
9140 (disk_index, utils.FormatUnit(size, "h")))
9141 disks.append(objects.Disk(dev_type=dev_type, size=size,
9142 logical_id=logical_id_fn(idx, disk_index, disk),
9143 iv_name="disk/%d" % disk_index,
9144 mode=disk[constants.IDISK_MODE],
9150 def _GetInstanceInfoText(instance):
9151 """Compute that text that should be added to the disk's metadata.
9154 return "originstname+%s" % instance.name
9157 def _CalcEta(time_taken, written, total_size):
9158 """Calculates the ETA based on size written and total size.
9160 @param time_taken: The time taken so far
9161 @param written: amount written so far
9162 @param total_size: The total size of data to be written
9163 @return: The remaining time in seconds
9166 avg_time = time_taken / float(written)
9167 return (total_size - written) * avg_time
9170 def _WipeDisks(lu, instance, disks=None):
9171 """Wipes instance disks.
9173 @type lu: L{LogicalUnit}
9174 @param lu: the logical unit on whose behalf we execute
9175 @type instance: L{objects.Instance}
9176 @param instance: the instance whose disks we should create
9177 @return: the success of the wipe
9180 node = instance.primary_node
9183 disks = [(idx, disk, 0)
9184 for (idx, disk) in enumerate(instance.disks)]
9186 for (_, device, _) in disks:
9187 lu.cfg.SetDiskID(device, node)
9189 logging.info("Pausing synchronization of disks of instance '%s'",
9191 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9192 (map(compat.snd, disks),
9195 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9197 for idx, success in enumerate(result.payload):
9199 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9200 " failed", idx, instance.name)
9203 for (idx, device, offset) in disks:
9204 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9205 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9207 int(min(constants.MAX_WIPE_CHUNK,
9208 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9212 start_time = time.time()
9217 info_text = (" (from %s to %s)" %
9218 (utils.FormatUnit(offset, "h"),
9219 utils.FormatUnit(size, "h")))
9221 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9223 logging.info("Wiping disk %d for instance %s on node %s using"
9224 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9226 while offset < size:
9227 wipe_size = min(wipe_chunk_size, size - offset)
9229 logging.debug("Wiping disk %d, offset %s, chunk %s",
9230 idx, offset, wipe_size)
9232 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9234 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9235 (idx, offset, wipe_size))
9239 if now - last_output >= 60:
9240 eta = _CalcEta(now - start_time, offset, size)
9241 lu.LogInfo(" - done: %.1f%% ETA: %s",
9242 offset / float(size) * 100, utils.FormatSeconds(eta))
9245 logging.info("Resuming synchronization of disks for instance '%s'",
9248 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9249 (map(compat.snd, disks),
9254 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9255 node, result.fail_msg)
9257 for idx, success in enumerate(result.payload):
9259 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9260 " failed", idx, instance.name)
9263 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9264 """Create all disks for an instance.
9266 This abstracts away some work from AddInstance.
9268 @type lu: L{LogicalUnit}
9269 @param lu: the logical unit on whose behalf we execute
9270 @type instance: L{objects.Instance}
9271 @param instance: the instance whose disks we should create
9273 @param to_skip: list of indices to skip
9274 @type target_node: string
9275 @param target_node: if passed, overrides the target node for creation
9277 @return: the success of the creation
9280 info = _GetInstanceInfoText(instance)
9281 if target_node is None:
9282 pnode = instance.primary_node
9283 all_nodes = instance.all_nodes
9288 if instance.disk_template in constants.DTS_FILEBASED:
9289 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9290 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9292 result.Raise("Failed to create directory '%s' on"
9293 " node %s" % (file_storage_dir, pnode))
9295 # Note: this needs to be kept in sync with adding of disks in
9296 # LUInstanceSetParams
9297 for idx, device in enumerate(instance.disks):
9298 if to_skip and idx in to_skip:
9300 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9302 for node in all_nodes:
9303 f_create = node == pnode
9304 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9307 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9308 """Remove all disks for an instance.
9310 This abstracts away some work from `AddInstance()` and
9311 `RemoveInstance()`. Note that in case some of the devices couldn't
9312 be removed, the removal will continue with the other ones (compare
9313 with `_CreateDisks()`).
9315 @type lu: L{LogicalUnit}
9316 @param lu: the logical unit on whose behalf we execute
9317 @type instance: L{objects.Instance}
9318 @param instance: the instance whose disks we should remove
9319 @type target_node: string
9320 @param target_node: used to override the node on which to remove the disks
9322 @return: the success of the removal
9325 logging.info("Removing block devices for instance %s", instance.name)
9328 ports_to_release = set()
9329 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9330 for (idx, device) in enumerate(anno_disks):
9332 edata = [(target_node, device)]
9334 edata = device.ComputeNodeTree(instance.primary_node)
9335 for node, disk in edata:
9336 lu.cfg.SetDiskID(disk, node)
9337 result = lu.rpc.call_blockdev_remove(node, disk)
9339 lu.LogWarning("Could not remove disk %s on node %s,"
9340 " continuing anyway: %s", idx, node, result.fail_msg)
9341 if not (result.offline and node != instance.primary_node):
9344 # if this is a DRBD disk, return its port to the pool
9345 if device.dev_type in constants.LDS_DRBD:
9346 ports_to_release.add(device.logical_id[2])
9348 if all_result or ignore_failures:
9349 for port in ports_to_release:
9350 lu.cfg.AddTcpUdpPort(port)
9352 if instance.disk_template in constants.DTS_FILEBASED:
9353 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9357 tgt = instance.primary_node
9358 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9360 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9361 file_storage_dir, instance.primary_node, result.fail_msg)
9367 def _ComputeDiskSizePerVG(disk_template, disks):
9368 """Compute disk size requirements in the volume group
9371 def _compute(disks, payload):
9372 """Universal algorithm.
9377 vgs[disk[constants.IDISK_VG]] = \
9378 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9382 # Required free disk space as a function of disk and swap space
9384 constants.DT_DISKLESS: {},
9385 constants.DT_PLAIN: _compute(disks, 0),
9386 # 128 MB are added for drbd metadata for each disk
9387 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9388 constants.DT_FILE: {},
9389 constants.DT_SHARED_FILE: {},
9392 if disk_template not in req_size_dict:
9393 raise errors.ProgrammerError("Disk template '%s' size requirement"
9394 " is unknown" % disk_template)
9396 return req_size_dict[disk_template]
9399 def _FilterVmNodes(lu, nodenames):
9400 """Filters out non-vm_capable nodes from a list.
9402 @type lu: L{LogicalUnit}
9403 @param lu: the logical unit for which we check
9404 @type nodenames: list
9405 @param nodenames: the list of nodes on which we should check
9407 @return: the list of vm-capable nodes
9410 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9411 return [name for name in nodenames if name not in vm_nodes]
9414 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9415 """Hypervisor parameter validation.
9417 This function abstract the hypervisor parameter validation to be
9418 used in both instance create and instance modify.
9420 @type lu: L{LogicalUnit}
9421 @param lu: the logical unit for which we check
9422 @type nodenames: list
9423 @param nodenames: the list of nodes on which we should check
9424 @type hvname: string
9425 @param hvname: the name of the hypervisor we should use
9426 @type hvparams: dict
9427 @param hvparams: the parameters which we need to check
9428 @raise errors.OpPrereqError: if the parameters are not valid
9431 nodenames = _FilterVmNodes(lu, nodenames)
9433 cluster = lu.cfg.GetClusterInfo()
9434 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9436 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9437 for node in nodenames:
9441 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9444 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9445 """OS parameters validation.
9447 @type lu: L{LogicalUnit}
9448 @param lu: the logical unit for which we check
9449 @type required: boolean
9450 @param required: whether the validation should fail if the OS is not
9452 @type nodenames: list
9453 @param nodenames: the list of nodes on which we should check
9454 @type osname: string
9455 @param osname: the name of the hypervisor we should use
9456 @type osparams: dict
9457 @param osparams: the parameters which we need to check
9458 @raise errors.OpPrereqError: if the parameters are not valid
9461 nodenames = _FilterVmNodes(lu, nodenames)
9462 result = lu.rpc.call_os_validate(nodenames, required, osname,
9463 [constants.OS_VALIDATE_PARAMETERS],
9465 for node, nres in result.items():
9466 # we don't check for offline cases since this should be run only
9467 # against the master node and/or an instance's nodes
9468 nres.Raise("OS Parameters validation failed on node %s" % node)
9469 if not nres.payload:
9470 lu.LogInfo("OS %s not found on node %s, validation skipped",
9474 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9475 """Wrapper around IAReqInstanceAlloc.
9477 @param op: The instance opcode
9478 @param disks: The computed disks
9479 @param nics: The computed nics
9480 @param beparams: The full filled beparams
9482 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9485 spindle_use = beparams[constants.BE_SPINDLE_USE]
9486 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9487 disk_template=op.disk_template,
9490 vcpus=beparams[constants.BE_VCPUS],
9491 memory=beparams[constants.BE_MAXMEM],
9492 spindle_use=spindle_use,
9494 nics=[n.ToDict() for n in nics],
9495 hypervisor=op.hypervisor)
9498 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9499 """Computes the nics.
9501 @param op: The instance opcode
9502 @param cluster: Cluster configuration object
9503 @param default_ip: The default ip to assign
9504 @param cfg: An instance of the configuration object
9505 @param proc: The executer instance
9507 @returns: The build up nics
9512 nic_mode_req = nic.get(constants.INIC_MODE, None)
9513 nic_mode = nic_mode_req
9514 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9515 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9517 net = nic.get(constants.INIC_NETWORK, None)
9518 link = nic.get(constants.NIC_LINK, None)
9519 ip = nic.get(constants.INIC_IP, None)
9521 if net is None or net.lower() == constants.VALUE_NONE:
9524 if nic_mode_req is not None or link is not None:
9525 raise errors.OpPrereqError("If network is given, no mode or link"
9526 " is allowed to be passed",
9529 # ip validity checks
9530 if ip is None or ip.lower() == constants.VALUE_NONE:
9532 elif ip.lower() == constants.VALUE_AUTO:
9533 if not op.name_check:
9534 raise errors.OpPrereqError("IP address set to auto but name checks"
9535 " have been skipped",
9539 # We defer pool operations until later, so that the iallocator has
9540 # filled in the instance's node(s) dimara
9541 if ip.lower() == constants.NIC_IP_POOL:
9543 raise errors.OpPrereqError("if ip=pool, parameter network"
9544 " must be passed too",
9547 elif not netutils.IPAddress.IsValid(ip):
9548 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9553 # TODO: check the ip address for uniqueness
9554 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9555 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9558 # MAC address verification
9559 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9560 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9561 mac = utils.NormalizeAndValidateMac(mac)
9564 # TODO: We need to factor this out
9565 cfg.ReserveMAC(mac, proc.GetECId())
9566 except errors.ReservationError:
9567 raise errors.OpPrereqError("MAC address %s already in use"
9568 " in cluster" % mac,
9569 errors.ECODE_NOTUNIQUE)
9571 # Build nic parameters
9574 nicparams[constants.NIC_MODE] = nic_mode
9576 nicparams[constants.NIC_LINK] = link
9578 check_params = cluster.SimpleFillNIC(nicparams)
9579 objects.NIC.CheckParameterSyntax(check_params)
9580 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9581 network=net, nicparams=nicparams))
9586 def _ComputeDisks(op, default_vg):
9587 """Computes the instance disks.
9589 @param op: The instance opcode
9590 @param default_vg: The default_vg to assume
9592 @return: The computer disks
9596 for disk in op.disks:
9597 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9598 if mode not in constants.DISK_ACCESS_SET:
9599 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9600 mode, errors.ECODE_INVAL)
9601 size = disk.get(constants.IDISK_SIZE, None)
9603 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9606 except (TypeError, ValueError):
9607 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9610 data_vg = disk.get(constants.IDISK_VG, default_vg)
9612 constants.IDISK_SIZE: size,
9613 constants.IDISK_MODE: mode,
9614 constants.IDISK_VG: data_vg,
9616 if constants.IDISK_METAVG in disk:
9617 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9618 if constants.IDISK_ADOPT in disk:
9619 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9620 disks.append(new_disk)
9625 def _ComputeFullBeParams(op, cluster):
9626 """Computes the full beparams.
9628 @param op: The instance opcode
9629 @param cluster: The cluster config object
9631 @return: The fully filled beparams
9634 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9635 for param, value in op.beparams.iteritems():
9636 if value == constants.VALUE_AUTO:
9637 op.beparams[param] = default_beparams[param]
9638 objects.UpgradeBeParams(op.beparams)
9639 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9640 return cluster.SimpleFillBE(op.beparams)
9643 class LUInstanceCreate(LogicalUnit):
9644 """Create an instance.
9647 HPATH = "instance-add"
9648 HTYPE = constants.HTYPE_INSTANCE
9651 def CheckArguments(self):
9655 # do not require name_check to ease forward/backward compatibility
9657 if self.op.no_install and self.op.start:
9658 self.LogInfo("No-installation mode selected, disabling startup")
9659 self.op.start = False
9660 # validate/normalize the instance name
9661 self.op.instance_name = \
9662 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9664 if self.op.ip_check and not self.op.name_check:
9665 # TODO: make the ip check more flexible and not depend on the name check
9666 raise errors.OpPrereqError("Cannot do IP address check without a name"
9667 " check", errors.ECODE_INVAL)
9669 # check nics' parameter names
9670 for nic in self.op.nics:
9671 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9673 # check disks. parameter names and consistent adopt/no-adopt strategy
9674 has_adopt = has_no_adopt = False
9675 for disk in self.op.disks:
9676 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9677 if constants.IDISK_ADOPT in disk:
9681 if has_adopt and has_no_adopt:
9682 raise errors.OpPrereqError("Either all disks are adopted or none is",
9685 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9686 raise errors.OpPrereqError("Disk adoption is not supported for the"
9687 " '%s' disk template" %
9688 self.op.disk_template,
9690 if self.op.iallocator is not None:
9691 raise errors.OpPrereqError("Disk adoption not allowed with an"
9692 " iallocator script", errors.ECODE_INVAL)
9693 if self.op.mode == constants.INSTANCE_IMPORT:
9694 raise errors.OpPrereqError("Disk adoption not allowed for"
9695 " instance import", errors.ECODE_INVAL)
9697 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9698 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9699 " but no 'adopt' parameter given" %
9700 self.op.disk_template,
9703 self.adopt_disks = has_adopt
9705 # instance name verification
9706 if self.op.name_check:
9707 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9708 self.op.instance_name = self.hostname1.name
9709 # used in CheckPrereq for ip ping check
9710 self.check_ip = self.hostname1.ip
9712 self.check_ip = None
9714 # file storage checks
9715 if (self.op.file_driver and
9716 not self.op.file_driver in constants.FILE_DRIVER):
9717 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9718 self.op.file_driver, errors.ECODE_INVAL)
9720 if self.op.disk_template == constants.DT_FILE:
9721 opcodes.RequireFileStorage()
9722 elif self.op.disk_template == constants.DT_SHARED_FILE:
9723 opcodes.RequireSharedFileStorage()
9725 ### Node/iallocator related checks
9726 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9728 if self.op.pnode is not None:
9729 if self.op.disk_template in constants.DTS_INT_MIRROR:
9730 if self.op.snode is None:
9731 raise errors.OpPrereqError("The networked disk templates need"
9732 " a mirror node", errors.ECODE_INVAL)
9734 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9736 self.op.snode = None
9738 self._cds = _GetClusterDomainSecret()
9740 if self.op.mode == constants.INSTANCE_IMPORT:
9741 # On import force_variant must be True, because if we forced it at
9742 # initial install, our only chance when importing it back is that it
9744 self.op.force_variant = True
9746 if self.op.no_install:
9747 self.LogInfo("No-installation mode has no effect during import")
9749 elif self.op.mode == constants.INSTANCE_CREATE:
9750 if self.op.os_type is None:
9751 raise errors.OpPrereqError("No guest OS specified",
9753 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9754 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9755 " installation" % self.op.os_type,
9757 if self.op.disk_template is None:
9758 raise errors.OpPrereqError("No disk template specified",
9761 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9762 # Check handshake to ensure both clusters have the same domain secret
9763 src_handshake = self.op.source_handshake
9764 if not src_handshake:
9765 raise errors.OpPrereqError("Missing source handshake",
9768 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9771 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9774 # Load and check source CA
9775 self.source_x509_ca_pem = self.op.source_x509_ca
9776 if not self.source_x509_ca_pem:
9777 raise errors.OpPrereqError("Missing source X509 CA",
9781 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9783 except OpenSSL.crypto.Error, err:
9784 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9785 (err, ), errors.ECODE_INVAL)
9787 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9788 if errcode is not None:
9789 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9792 self.source_x509_ca = cert
9794 src_instance_name = self.op.source_instance_name
9795 if not src_instance_name:
9796 raise errors.OpPrereqError("Missing source instance name",
9799 self.source_instance_name = \
9800 netutils.GetHostname(name=src_instance_name).name
9803 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9804 self.op.mode, errors.ECODE_INVAL)
9806 def ExpandNames(self):
9807 """ExpandNames for CreateInstance.
9809 Figure out the right locks for instance creation.
9812 self.needed_locks = {}
9814 instance_name = self.op.instance_name
9815 # this is just a preventive check, but someone might still add this
9816 # instance in the meantime, and creation will fail at lock-add time
9817 if instance_name in self.cfg.GetInstanceList():
9818 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9819 instance_name, errors.ECODE_EXISTS)
9821 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9823 if self.op.iallocator:
9824 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9825 # specifying a group on instance creation and then selecting nodes from
9827 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9828 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9830 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9831 nodelist = [self.op.pnode]
9832 if self.op.snode is not None:
9833 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9834 nodelist.append(self.op.snode)
9835 self.needed_locks[locking.LEVEL_NODE] = nodelist
9836 # Lock resources of instance's primary and secondary nodes (copy to
9837 # prevent accidential modification)
9838 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9840 # in case of import lock the source node too
9841 if self.op.mode == constants.INSTANCE_IMPORT:
9842 src_node = self.op.src_node
9843 src_path = self.op.src_path
9845 if src_path is None:
9846 self.op.src_path = src_path = self.op.instance_name
9848 if src_node is None:
9849 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9850 self.op.src_node = None
9851 if os.path.isabs(src_path):
9852 raise errors.OpPrereqError("Importing an instance from a path"
9853 " requires a source node option",
9856 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9857 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9858 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9859 if not os.path.isabs(src_path):
9860 self.op.src_path = src_path = \
9861 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9863 def _RunAllocator(self):
9864 """Run the allocator based on input opcode.
9867 #TODO Export network to iallocator so that it chooses a pnode
9868 # in a nodegroup that has the desired network connected to
9869 req = _CreateInstanceAllocRequest(self.op, self.disks,
9870 self.nics, self.be_full)
9871 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9873 ial.Run(self.op.iallocator)
9876 raise errors.OpPrereqError("Can't compute nodes using"
9877 " iallocator '%s': %s" %
9878 (self.op.iallocator, ial.info),
9880 self.op.pnode = ial.result[0]
9881 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9882 self.op.instance_name, self.op.iallocator,
9883 utils.CommaJoin(ial.result))
9885 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9887 if req.RequiredNodes() == 2:
9888 self.op.snode = ial.result[1]
9890 def BuildHooksEnv(self):
9893 This runs on master, primary and secondary nodes of the instance.
9897 "ADD_MODE": self.op.mode,
9899 if self.op.mode == constants.INSTANCE_IMPORT:
9900 env["SRC_NODE"] = self.op.src_node
9901 env["SRC_PATH"] = self.op.src_path
9902 env["SRC_IMAGES"] = self.src_images
9904 env.update(_BuildInstanceHookEnv(
9905 name=self.op.instance_name,
9906 primary_node=self.op.pnode,
9907 secondary_nodes=self.secondaries,
9908 status=self.op.start,
9909 os_type=self.op.os_type,
9910 minmem=self.be_full[constants.BE_MINMEM],
9911 maxmem=self.be_full[constants.BE_MAXMEM],
9912 vcpus=self.be_full[constants.BE_VCPUS],
9913 nics=_NICListToTuple(self, self.nics),
9914 disk_template=self.op.disk_template,
9915 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9916 for d in self.disks],
9919 hypervisor_name=self.op.hypervisor,
9925 def BuildHooksNodes(self):
9926 """Build hooks nodes.
9929 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9932 def _ReadExportInfo(self):
9933 """Reads the export information from disk.
9935 It will override the opcode source node and path with the actual
9936 information, if these two were not specified before.
9938 @return: the export information
9941 assert self.op.mode == constants.INSTANCE_IMPORT
9943 src_node = self.op.src_node
9944 src_path = self.op.src_path
9946 if src_node is None:
9947 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9948 exp_list = self.rpc.call_export_list(locked_nodes)
9950 for node in exp_list:
9951 if exp_list[node].fail_msg:
9953 if src_path in exp_list[node].payload:
9955 self.op.src_node = src_node = node
9956 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9960 raise errors.OpPrereqError("No export found for relative path %s" %
9961 src_path, errors.ECODE_INVAL)
9963 _CheckNodeOnline(self, src_node)
9964 result = self.rpc.call_export_info(src_node, src_path)
9965 result.Raise("No export or invalid export found in dir %s" % src_path)
9967 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9968 if not export_info.has_section(constants.INISECT_EXP):
9969 raise errors.ProgrammerError("Corrupted export config",
9970 errors.ECODE_ENVIRON)
9972 ei_version = export_info.get(constants.INISECT_EXP, "version")
9973 if (int(ei_version) != constants.EXPORT_VERSION):
9974 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9975 (ei_version, constants.EXPORT_VERSION),
9976 errors.ECODE_ENVIRON)
9979 def _ReadExportParams(self, einfo):
9980 """Use export parameters as defaults.
9982 In case the opcode doesn't specify (as in override) some instance
9983 parameters, then try to use them from the export information, if
9987 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9989 if self.op.disk_template is None:
9990 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9991 self.op.disk_template = einfo.get(constants.INISECT_INS,
9993 if self.op.disk_template not in constants.DISK_TEMPLATES:
9994 raise errors.OpPrereqError("Disk template specified in configuration"
9995 " file is not one of the allowed values:"
9997 " ".join(constants.DISK_TEMPLATES),
10000 raise errors.OpPrereqError("No disk template specified and the export"
10001 " is missing the disk_template information",
10002 errors.ECODE_INVAL)
10004 if not self.op.disks:
10006 # TODO: import the disk iv_name too
10007 for idx in range(constants.MAX_DISKS):
10008 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10009 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10010 disks.append({constants.IDISK_SIZE: disk_sz})
10011 self.op.disks = disks
10012 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10013 raise errors.OpPrereqError("No disk info specified and the export"
10014 " is missing the disk information",
10015 errors.ECODE_INVAL)
10017 if not self.op.nics:
10019 for idx in range(constants.MAX_NICS):
10020 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10022 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10023 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10028 self.op.nics = nics
10030 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10031 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10033 if (self.op.hypervisor is None and
10034 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10035 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10037 if einfo.has_section(constants.INISECT_HYP):
10038 # use the export parameters but do not override the ones
10039 # specified by the user
10040 for name, value in einfo.items(constants.INISECT_HYP):
10041 if name not in self.op.hvparams:
10042 self.op.hvparams[name] = value
10044 if einfo.has_section(constants.INISECT_BEP):
10045 # use the parameters, without overriding
10046 for name, value in einfo.items(constants.INISECT_BEP):
10047 if name not in self.op.beparams:
10048 self.op.beparams[name] = value
10049 # Compatibility for the old "memory" be param
10050 if name == constants.BE_MEMORY:
10051 if constants.BE_MAXMEM not in self.op.beparams:
10052 self.op.beparams[constants.BE_MAXMEM] = value
10053 if constants.BE_MINMEM not in self.op.beparams:
10054 self.op.beparams[constants.BE_MINMEM] = value
10056 # try to read the parameters old style, from the main section
10057 for name in constants.BES_PARAMETERS:
10058 if (name not in self.op.beparams and
10059 einfo.has_option(constants.INISECT_INS, name)):
10060 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10062 if einfo.has_section(constants.INISECT_OSP):
10063 # use the parameters, without overriding
10064 for name, value in einfo.items(constants.INISECT_OSP):
10065 if name not in self.op.osparams:
10066 self.op.osparams[name] = value
10068 def _RevertToDefaults(self, cluster):
10069 """Revert the instance parameters to the default values.
10073 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10074 for name in self.op.hvparams.keys():
10075 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10076 del self.op.hvparams[name]
10078 be_defs = cluster.SimpleFillBE({})
10079 for name in self.op.beparams.keys():
10080 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10081 del self.op.beparams[name]
10083 nic_defs = cluster.SimpleFillNIC({})
10084 for nic in self.op.nics:
10085 for name in constants.NICS_PARAMETERS:
10086 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10089 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10090 for name in self.op.osparams.keys():
10091 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10092 del self.op.osparams[name]
10094 def _CalculateFileStorageDir(self):
10095 """Calculate final instance file storage dir.
10098 # file storage dir calculation/check
10099 self.instance_file_storage_dir = None
10100 if self.op.disk_template in constants.DTS_FILEBASED:
10101 # build the full file storage dir path
10104 if self.op.disk_template == constants.DT_SHARED_FILE:
10105 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10107 get_fsd_fn = self.cfg.GetFileStorageDir
10109 cfg_storagedir = get_fsd_fn()
10110 if not cfg_storagedir:
10111 raise errors.OpPrereqError("Cluster file storage dir not defined",
10112 errors.ECODE_STATE)
10113 joinargs.append(cfg_storagedir)
10115 if self.op.file_storage_dir is not None:
10116 joinargs.append(self.op.file_storage_dir)
10118 joinargs.append(self.op.instance_name)
10120 # pylint: disable=W0142
10121 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10123 def CheckPrereq(self): # pylint: disable=R0914
10124 """Check prerequisites.
10127 self._CalculateFileStorageDir()
10129 if self.op.mode == constants.INSTANCE_IMPORT:
10130 export_info = self._ReadExportInfo()
10131 self._ReadExportParams(export_info)
10132 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10134 self._old_instance_name = None
10136 if (not self.cfg.GetVGName() and
10137 self.op.disk_template not in constants.DTS_NOT_LVM):
10138 raise errors.OpPrereqError("Cluster does not support lvm-based"
10139 " instances", errors.ECODE_STATE)
10141 if (self.op.hypervisor is None or
10142 self.op.hypervisor == constants.VALUE_AUTO):
10143 self.op.hypervisor = self.cfg.GetHypervisorType()
10145 cluster = self.cfg.GetClusterInfo()
10146 enabled_hvs = cluster.enabled_hypervisors
10147 if self.op.hypervisor not in enabled_hvs:
10148 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10150 (self.op.hypervisor, ",".join(enabled_hvs)),
10151 errors.ECODE_STATE)
10153 # Check tag validity
10154 for tag in self.op.tags:
10155 objects.TaggableObject.ValidateTag(tag)
10157 # check hypervisor parameter syntax (locally)
10158 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10159 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10161 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10162 hv_type.CheckParameterSyntax(filled_hvp)
10163 self.hv_full = filled_hvp
10164 # check that we don't specify global parameters on an instance
10165 _CheckGlobalHvParams(self.op.hvparams)
10167 # fill and remember the beparams dict
10168 self.be_full = _ComputeFullBeParams(self.op, cluster)
10170 # build os parameters
10171 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10173 # now that hvp/bep are in final format, let's reset to defaults,
10175 if self.op.identify_defaults:
10176 self._RevertToDefaults(cluster)
10179 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10182 # disk checks/pre-build
10183 default_vg = self.cfg.GetVGName()
10184 self.disks = _ComputeDisks(self.op, default_vg)
10186 if self.op.mode == constants.INSTANCE_IMPORT:
10188 for idx in range(len(self.disks)):
10189 option = "disk%d_dump" % idx
10190 if export_info.has_option(constants.INISECT_INS, option):
10191 # FIXME: are the old os-es, disk sizes, etc. useful?
10192 export_name = export_info.get(constants.INISECT_INS, option)
10193 image = utils.PathJoin(self.op.src_path, export_name)
10194 disk_images.append(image)
10196 disk_images.append(False)
10198 self.src_images = disk_images
10200 if self.op.instance_name == self._old_instance_name:
10201 for idx, nic in enumerate(self.nics):
10202 if nic.mac == constants.VALUE_AUTO:
10203 nic_mac_ini = "nic%d_mac" % idx
10204 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10206 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10208 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10209 if self.op.ip_check:
10210 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10211 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10212 (self.check_ip, self.op.instance_name),
10213 errors.ECODE_NOTUNIQUE)
10215 #### mac address generation
10216 # By generating here the mac address both the allocator and the hooks get
10217 # the real final mac address rather than the 'auto' or 'generate' value.
10218 # There is a race condition between the generation and the instance object
10219 # creation, which means that we know the mac is valid now, but we're not
10220 # sure it will be when we actually add the instance. If things go bad
10221 # adding the instance will abort because of a duplicate mac, and the
10222 # creation job will fail.
10223 for nic in self.nics:
10224 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10225 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10229 if self.op.iallocator is not None:
10230 self._RunAllocator()
10232 # Release all unneeded node locks
10233 _ReleaseLocks(self, locking.LEVEL_NODE,
10234 keep=filter(None, [self.op.pnode, self.op.snode,
10235 self.op.src_node]))
10236 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10237 keep=filter(None, [self.op.pnode, self.op.snode,
10238 self.op.src_node]))
10240 #### node related checks
10242 # check primary node
10243 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10244 assert self.pnode is not None, \
10245 "Cannot retrieve locked node %s" % self.op.pnode
10247 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10248 pnode.name, errors.ECODE_STATE)
10250 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10251 pnode.name, errors.ECODE_STATE)
10252 if not pnode.vm_capable:
10253 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10254 " '%s'" % pnode.name, errors.ECODE_STATE)
10256 self.secondaries = []
10258 # Fill in any IPs from IP pools. This must happen here, because we need to
10259 # know the nic's primary node, as specified by the iallocator
10260 for idx, nic in enumerate(self.nics):
10262 if net is not None:
10263 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10264 if netparams is None:
10265 raise errors.OpPrereqError("No netparams found for network"
10266 " %s. Propably not connected to"
10267 " node's %s nodegroup" %
10268 (net, self.pnode.name),
10269 errors.ECODE_INVAL)
10270 self.LogInfo("NIC/%d inherits netparams %s" %
10271 (idx, netparams.values()))
10272 nic.nicparams = dict(netparams)
10273 if nic.ip is not None:
10274 if nic.ip.lower() == constants.NIC_IP_POOL:
10276 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10277 except errors.ReservationError:
10278 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10279 " from the address pool" % idx,
10280 errors.ECODE_STATE)
10281 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10284 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10285 except errors.ReservationError:
10286 raise errors.OpPrereqError("IP address %s already in use"
10287 " or does not belong to network %s" %
10289 errors.ECODE_NOTUNIQUE)
10291 # net is None, ip None or given
10292 if self.op.conflicts_check:
10293 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10295 # mirror node verification
10296 if self.op.disk_template in constants.DTS_INT_MIRROR:
10297 if self.op.snode == pnode.name:
10298 raise errors.OpPrereqError("The secondary node cannot be the"
10299 " primary node", errors.ECODE_INVAL)
10300 _CheckNodeOnline(self, self.op.snode)
10301 _CheckNodeNotDrained(self, self.op.snode)
10302 _CheckNodeVmCapable(self, self.op.snode)
10303 self.secondaries.append(self.op.snode)
10305 snode = self.cfg.GetNodeInfo(self.op.snode)
10306 if pnode.group != snode.group:
10307 self.LogWarning("The primary and secondary nodes are in two"
10308 " different node groups; the disk parameters"
10309 " from the first disk's node group will be"
10312 nodenames = [pnode.name] + self.secondaries
10314 # Verify instance specs
10315 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10317 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10318 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10319 constants.ISPEC_DISK_COUNT: len(self.disks),
10320 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10321 constants.ISPEC_NIC_COUNT: len(self.nics),
10322 constants.ISPEC_SPINDLE_USE: spindle_use,
10325 group_info = self.cfg.GetNodeGroup(pnode.group)
10326 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10327 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10328 if not self.op.ignore_ipolicy and res:
10329 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10330 (pnode.group, group_info.name, utils.CommaJoin(res)))
10331 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10333 if not self.adopt_disks:
10334 if self.op.disk_template == constants.DT_RBD:
10335 # _CheckRADOSFreeSpace() is just a placeholder.
10336 # Any function that checks prerequisites can be placed here.
10337 # Check if there is enough space on the RADOS cluster.
10338 _CheckRADOSFreeSpace()
10340 # Check lv size requirements, if not adopting
10341 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10342 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10344 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10345 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10346 disk[constants.IDISK_ADOPT])
10347 for disk in self.disks])
10348 if len(all_lvs) != len(self.disks):
10349 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10350 errors.ECODE_INVAL)
10351 for lv_name in all_lvs:
10353 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10354 # to ReserveLV uses the same syntax
10355 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10356 except errors.ReservationError:
10357 raise errors.OpPrereqError("LV named %s used by another instance" %
10358 lv_name, errors.ECODE_NOTUNIQUE)
10360 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10361 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10363 node_lvs = self.rpc.call_lv_list([pnode.name],
10364 vg_names.payload.keys())[pnode.name]
10365 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10366 node_lvs = node_lvs.payload
10368 delta = all_lvs.difference(node_lvs.keys())
10370 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10371 utils.CommaJoin(delta),
10372 errors.ECODE_INVAL)
10373 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10375 raise errors.OpPrereqError("Online logical volumes found, cannot"
10376 " adopt: %s" % utils.CommaJoin(online_lvs),
10377 errors.ECODE_STATE)
10378 # update the size of disk based on what is found
10379 for dsk in self.disks:
10380 dsk[constants.IDISK_SIZE] = \
10381 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10382 dsk[constants.IDISK_ADOPT])][0]))
10384 elif self.op.disk_template == constants.DT_BLOCK:
10385 # Normalize and de-duplicate device paths
10386 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10387 for disk in self.disks])
10388 if len(all_disks) != len(self.disks):
10389 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10390 errors.ECODE_INVAL)
10391 baddisks = [d for d in all_disks
10392 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10394 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10395 " cannot be adopted" %
10396 (", ".join(baddisks),
10397 constants.ADOPTABLE_BLOCKDEV_ROOT),
10398 errors.ECODE_INVAL)
10400 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10401 list(all_disks))[pnode.name]
10402 node_disks.Raise("Cannot get block device information from node %s" %
10404 node_disks = node_disks.payload
10405 delta = all_disks.difference(node_disks.keys())
10407 raise errors.OpPrereqError("Missing block device(s): %s" %
10408 utils.CommaJoin(delta),
10409 errors.ECODE_INVAL)
10410 for dsk in self.disks:
10411 dsk[constants.IDISK_SIZE] = \
10412 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10414 # Verify instance specs
10415 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10417 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10418 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10419 constants.ISPEC_DISK_COUNT: len(self.disks),
10420 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10421 for disk in self.disks],
10422 constants.ISPEC_NIC_COUNT: len(self.nics),
10423 constants.ISPEC_SPINDLE_USE: spindle_use,
10426 group_info = self.cfg.GetNodeGroup(pnode.group)
10427 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10428 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10429 if not self.op.ignore_ipolicy and res:
10430 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10431 " policy: %s") % (pnode.group,
10432 utils.CommaJoin(res)),
10433 errors.ECODE_INVAL)
10435 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10437 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10438 # check OS parameters (remotely)
10439 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10441 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10443 # memory check on primary node
10444 #TODO(dynmem): use MINMEM for checking
10446 _CheckNodeFreeMemory(self, self.pnode.name,
10447 "creating instance %s" % self.op.instance_name,
10448 self.be_full[constants.BE_MAXMEM],
10449 self.op.hypervisor)
10451 self.dry_run_result = list(nodenames)
10453 def Exec(self, feedback_fn):
10454 """Create and add the instance to the cluster.
10457 instance = self.op.instance_name
10458 pnode_name = self.pnode.name
10460 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10461 self.owned_locks(locking.LEVEL_NODE)), \
10462 "Node locks differ from node resource locks"
10464 ht_kind = self.op.hypervisor
10465 if ht_kind in constants.HTS_REQ_PORT:
10466 network_port = self.cfg.AllocatePort()
10468 network_port = None
10470 # This is ugly but we got a chicken-egg problem here
10471 # We can only take the group disk parameters, as the instance
10472 # has no disks yet (we are generating them right here).
10473 node = self.cfg.GetNodeInfo(pnode_name)
10474 nodegroup = self.cfg.GetNodeGroup(node.group)
10475 disks = _GenerateDiskTemplate(self,
10476 self.op.disk_template,
10477 instance, pnode_name,
10480 self.instance_file_storage_dir,
10481 self.op.file_driver,
10484 self.cfg.GetGroupDiskParams(nodegroup))
10486 iobj = objects.Instance(name=instance, os=self.op.os_type,
10487 primary_node=pnode_name,
10488 nics=self.nics, disks=disks,
10489 disk_template=self.op.disk_template,
10490 admin_state=constants.ADMINST_DOWN,
10491 network_port=network_port,
10492 beparams=self.op.beparams,
10493 hvparams=self.op.hvparams,
10494 hypervisor=self.op.hypervisor,
10495 osparams=self.op.osparams,
10499 for tag in self.op.tags:
10502 if self.adopt_disks:
10503 if self.op.disk_template == constants.DT_PLAIN:
10504 # rename LVs to the newly-generated names; we need to construct
10505 # 'fake' LV disks with the old data, plus the new unique_id
10506 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10508 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10509 rename_to.append(t_dsk.logical_id)
10510 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10511 self.cfg.SetDiskID(t_dsk, pnode_name)
10512 result = self.rpc.call_blockdev_rename(pnode_name,
10513 zip(tmp_disks, rename_to))
10514 result.Raise("Failed to rename adoped LVs")
10516 feedback_fn("* creating instance disks...")
10518 _CreateDisks(self, iobj)
10519 except errors.OpExecError:
10520 self.LogWarning("Device creation failed, reverting...")
10522 _RemoveDisks(self, iobj)
10524 self.cfg.ReleaseDRBDMinors(instance)
10527 feedback_fn("adding instance %s to cluster config" % instance)
10529 self.cfg.AddInstance(iobj, self.proc.GetECId())
10531 # Declare that we don't want to remove the instance lock anymore, as we've
10532 # added the instance to the config
10533 del self.remove_locks[locking.LEVEL_INSTANCE]
10535 if self.op.mode == constants.INSTANCE_IMPORT:
10536 # Release unused nodes
10537 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10539 # Release all nodes
10540 _ReleaseLocks(self, locking.LEVEL_NODE)
10543 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10544 feedback_fn("* wiping instance disks...")
10546 _WipeDisks(self, iobj)
10547 except errors.OpExecError, err:
10548 logging.exception("Wiping disks failed")
10549 self.LogWarning("Wiping instance disks failed (%s)", err)
10553 # Something is already wrong with the disks, don't do anything else
10555 elif self.op.wait_for_sync:
10556 disk_abort = not _WaitForSync(self, iobj)
10557 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10558 # make sure the disks are not degraded (still sync-ing is ok)
10559 feedback_fn("* checking mirrors status")
10560 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10565 _RemoveDisks(self, iobj)
10566 self.cfg.RemoveInstance(iobj.name)
10567 # Make sure the instance lock gets removed
10568 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10569 raise errors.OpExecError("There are some degraded disks for"
10572 # Release all node resource locks
10573 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10575 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10576 # we need to set the disks ID to the primary node, since the
10577 # preceding code might or might have not done it, depending on
10578 # disk template and other options
10579 for disk in iobj.disks:
10580 self.cfg.SetDiskID(disk, pnode_name)
10581 if self.op.mode == constants.INSTANCE_CREATE:
10582 if not self.op.no_install:
10583 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10584 not self.op.wait_for_sync)
10586 feedback_fn("* pausing disk sync to install instance OS")
10587 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10590 for idx, success in enumerate(result.payload):
10592 logging.warn("pause-sync of instance %s for disk %d failed",
10595 feedback_fn("* running the instance OS create scripts...")
10596 # FIXME: pass debug option from opcode to backend
10598 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10599 self.op.debug_level)
10601 feedback_fn("* resuming disk sync")
10602 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10605 for idx, success in enumerate(result.payload):
10607 logging.warn("resume-sync of instance %s for disk %d failed",
10610 os_add_result.Raise("Could not add os for instance %s"
10611 " on node %s" % (instance, pnode_name))
10614 if self.op.mode == constants.INSTANCE_IMPORT:
10615 feedback_fn("* running the instance OS import scripts...")
10619 for idx, image in enumerate(self.src_images):
10623 # FIXME: pass debug option from opcode to backend
10624 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10625 constants.IEIO_FILE, (image, ),
10626 constants.IEIO_SCRIPT,
10627 (iobj.disks[idx], idx),
10629 transfers.append(dt)
10632 masterd.instance.TransferInstanceData(self, feedback_fn,
10633 self.op.src_node, pnode_name,
10634 self.pnode.secondary_ip,
10636 if not compat.all(import_result):
10637 self.LogWarning("Some disks for instance %s on node %s were not"
10638 " imported successfully" % (instance, pnode_name))
10640 rename_from = self._old_instance_name
10642 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10643 feedback_fn("* preparing remote import...")
10644 # The source cluster will stop the instance before attempting to make
10645 # a connection. In some cases stopping an instance can take a long
10646 # time, hence the shutdown timeout is added to the connection
10648 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10649 self.op.source_shutdown_timeout)
10650 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10652 assert iobj.primary_node == self.pnode.name
10654 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10655 self.source_x509_ca,
10656 self._cds, timeouts)
10657 if not compat.all(disk_results):
10658 # TODO: Should the instance still be started, even if some disks
10659 # failed to import (valid for local imports, too)?
10660 self.LogWarning("Some disks for instance %s on node %s were not"
10661 " imported successfully" % (instance, pnode_name))
10663 rename_from = self.source_instance_name
10666 # also checked in the prereq part
10667 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10670 # Run rename script on newly imported instance
10671 assert iobj.name == instance
10672 feedback_fn("Running rename script for %s" % instance)
10673 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10675 self.op.debug_level)
10676 if result.fail_msg:
10677 self.LogWarning("Failed to run rename script for %s on node"
10678 " %s: %s" % (instance, pnode_name, result.fail_msg))
10680 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10683 iobj.admin_state = constants.ADMINST_UP
10684 self.cfg.Update(iobj, feedback_fn)
10685 logging.info("Starting instance %s on node %s", instance, pnode_name)
10686 feedback_fn("* starting instance...")
10687 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10689 result.Raise("Could not start instance")
10691 return list(iobj.all_nodes)
10694 class LUInstanceMultiAlloc(NoHooksLU):
10695 """Allocates multiple instances at the same time.
10700 def CheckArguments(self):
10701 """Check arguments.
10705 for inst in self.op.instances:
10706 if inst.iallocator is not None:
10707 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10708 " instance objects", errors.ECODE_INVAL)
10709 nodes.append(bool(inst.pnode))
10710 if inst.disk_template in constants.DTS_INT_MIRROR:
10711 nodes.append(bool(inst.snode))
10713 has_nodes = compat.any(nodes)
10714 if compat.all(nodes) ^ has_nodes:
10715 raise errors.OpPrereqError("There are instance objects providing"
10716 " pnode/snode while others do not",
10717 errors.ECODE_INVAL)
10719 if self.op.iallocator is None:
10720 default_iallocator = self.cfg.GetDefaultIAllocator()
10721 if default_iallocator and has_nodes:
10722 self.op.iallocator = default_iallocator
10724 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10725 " given and no cluster-wide default"
10726 " iallocator found; please specify either"
10727 " an iallocator or nodes on the instances"
10728 " or set a cluster-wide default iallocator",
10729 errors.ECODE_INVAL)
10731 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10733 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10734 utils.CommaJoin(dups), errors.ECODE_INVAL)
10736 def ExpandNames(self):
10737 """Calculate the locks.
10740 self.share_locks = _ShareAll()
10741 self.needed_locks = {}
10743 if self.op.iallocator:
10744 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10745 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10748 for inst in self.op.instances:
10749 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10750 nodeslist.append(inst.pnode)
10751 if inst.snode is not None:
10752 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10753 nodeslist.append(inst.snode)
10755 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10756 # Lock resources of instance's primary and secondary nodes (copy to
10757 # prevent accidential modification)
10758 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10760 def CheckPrereq(self):
10761 """Check prerequisite.
10764 cluster = self.cfg.GetClusterInfo()
10765 default_vg = self.cfg.GetVGName()
10766 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10767 _ComputeNics(op, cluster, None,
10768 self.cfg, self.proc),
10769 _ComputeFullBeParams(op, cluster))
10770 for op in self.op.instances]
10771 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10772 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10774 ial.Run(self.op.iallocator)
10776 if not ial.success:
10777 raise errors.OpPrereqError("Can't compute nodes using"
10778 " iallocator '%s': %s" %
10779 (self.op.iallocator, ial.info),
10780 errors.ECODE_NORES)
10782 self.ia_result = ial.result
10784 if self.op.dry_run:
10785 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10786 constants.JOB_IDS_KEY: [],
10789 def _ConstructPartialResult(self):
10790 """Contructs the partial result.
10793 (allocatable, failed) = self.ia_result
10795 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10796 map(compat.fst, allocatable),
10797 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10800 def Exec(self, feedback_fn):
10801 """Executes the opcode.
10804 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10805 (allocatable, failed) = self.ia_result
10808 for (name, nodes) in allocatable:
10809 op = op2inst.pop(name)
10812 (op.pnode, op.snode) = nodes
10814 (op.pnode,) = nodes
10818 missing = set(op2inst.keys()) - set(failed)
10819 assert not missing, \
10820 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10822 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10825 def _CheckRADOSFreeSpace():
10826 """Compute disk size requirements inside the RADOS cluster.
10829 # For the RADOS cluster we assume there is always enough space.
10833 class LUInstanceConsole(NoHooksLU):
10834 """Connect to an instance's console.
10836 This is somewhat special in that it returns the command line that
10837 you need to run on the master node in order to connect to the
10843 def ExpandNames(self):
10844 self.share_locks = _ShareAll()
10845 self._ExpandAndLockInstance()
10847 def CheckPrereq(self):
10848 """Check prerequisites.
10850 This checks that the instance is in the cluster.
10853 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10854 assert self.instance is not None, \
10855 "Cannot retrieve locked instance %s" % self.op.instance_name
10856 _CheckNodeOnline(self, self.instance.primary_node)
10858 def Exec(self, feedback_fn):
10859 """Connect to the console of an instance
10862 instance = self.instance
10863 node = instance.primary_node
10865 node_insts = self.rpc.call_instance_list([node],
10866 [instance.hypervisor])[node]
10867 node_insts.Raise("Can't get node information from %s" % node)
10869 if instance.name not in node_insts.payload:
10870 if instance.admin_state == constants.ADMINST_UP:
10871 state = constants.INSTST_ERRORDOWN
10872 elif instance.admin_state == constants.ADMINST_DOWN:
10873 state = constants.INSTST_ADMINDOWN
10875 state = constants.INSTST_ADMINOFFLINE
10876 raise errors.OpExecError("Instance %s is not running (state %s)" %
10877 (instance.name, state))
10879 logging.debug("Connecting to console of %s on %s", instance.name, node)
10881 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10884 def _GetInstanceConsole(cluster, instance):
10885 """Returns console information for an instance.
10887 @type cluster: L{objects.Cluster}
10888 @type instance: L{objects.Instance}
10892 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10893 # beparams and hvparams are passed separately, to avoid editing the
10894 # instance and then saving the defaults in the instance itself.
10895 hvparams = cluster.FillHV(instance)
10896 beparams = cluster.FillBE(instance)
10897 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10899 assert console.instance == instance.name
10900 assert console.Validate()
10902 return console.ToDict()
10905 class LUInstanceReplaceDisks(LogicalUnit):
10906 """Replace the disks of an instance.
10909 HPATH = "mirrors-replace"
10910 HTYPE = constants.HTYPE_INSTANCE
10913 def CheckArguments(self):
10914 """Check arguments.
10917 remote_node = self.op.remote_node
10918 ialloc = self.op.iallocator
10919 if self.op.mode == constants.REPLACE_DISK_CHG:
10920 if remote_node is None and ialloc is None:
10921 raise errors.OpPrereqError("When changing the secondary either an"
10922 " iallocator script must be used or the"
10923 " new node given", errors.ECODE_INVAL)
10925 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10927 elif remote_node is not None or ialloc is not None:
10928 # Not replacing the secondary
10929 raise errors.OpPrereqError("The iallocator and new node options can"
10930 " only be used when changing the"
10931 " secondary node", errors.ECODE_INVAL)
10933 def ExpandNames(self):
10934 self._ExpandAndLockInstance()
10936 assert locking.LEVEL_NODE not in self.needed_locks
10937 assert locking.LEVEL_NODE_RES not in self.needed_locks
10938 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10940 assert self.op.iallocator is None or self.op.remote_node is None, \
10941 "Conflicting options"
10943 if self.op.remote_node is not None:
10944 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10946 # Warning: do not remove the locking of the new secondary here
10947 # unless DRBD8.AddChildren is changed to work in parallel;
10948 # currently it doesn't since parallel invocations of
10949 # FindUnusedMinor will conflict
10950 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10951 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10953 self.needed_locks[locking.LEVEL_NODE] = []
10954 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10956 if self.op.iallocator is not None:
10957 # iallocator will select a new node in the same group
10958 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10960 self.needed_locks[locking.LEVEL_NODE_RES] = []
10962 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10963 self.op.iallocator, self.op.remote_node,
10964 self.op.disks, False, self.op.early_release,
10965 self.op.ignore_ipolicy)
10967 self.tasklets = [self.replacer]
10969 def DeclareLocks(self, level):
10970 if level == locking.LEVEL_NODEGROUP:
10971 assert self.op.remote_node is None
10972 assert self.op.iallocator is not None
10973 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10975 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10976 # Lock all groups used by instance optimistically; this requires going
10977 # via the node before it's locked, requiring verification later on
10978 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10979 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10981 elif level == locking.LEVEL_NODE:
10982 if self.op.iallocator is not None:
10983 assert self.op.remote_node is None
10984 assert not self.needed_locks[locking.LEVEL_NODE]
10986 # Lock member nodes of all locked groups
10987 self.needed_locks[locking.LEVEL_NODE] = \
10989 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10990 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10992 self._LockInstancesNodes()
10993 elif level == locking.LEVEL_NODE_RES:
10995 self.needed_locks[locking.LEVEL_NODE_RES] = \
10996 self.needed_locks[locking.LEVEL_NODE]
10998 def BuildHooksEnv(self):
10999 """Build hooks env.
11001 This runs on the master, the primary and all the secondaries.
11004 instance = self.replacer.instance
11006 "MODE": self.op.mode,
11007 "NEW_SECONDARY": self.op.remote_node,
11008 "OLD_SECONDARY": instance.secondary_nodes[0],
11010 env.update(_BuildInstanceHookEnvByObject(self, instance))
11013 def BuildHooksNodes(self):
11014 """Build hooks nodes.
11017 instance = self.replacer.instance
11019 self.cfg.GetMasterNode(),
11020 instance.primary_node,
11022 if self.op.remote_node is not None:
11023 nl.append(self.op.remote_node)
11026 def CheckPrereq(self):
11027 """Check prerequisites.
11030 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11031 self.op.iallocator is None)
11033 # Verify if node group locks are still correct
11034 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11036 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11038 return LogicalUnit.CheckPrereq(self)
11041 class TLReplaceDisks(Tasklet):
11042 """Replaces disks for an instance.
11044 Note: Locking is not within the scope of this class.
11047 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11048 disks, delay_iallocator, early_release, ignore_ipolicy):
11049 """Initializes this class.
11052 Tasklet.__init__(self, lu)
11055 self.instance_name = instance_name
11057 self.iallocator_name = iallocator_name
11058 self.remote_node = remote_node
11060 self.delay_iallocator = delay_iallocator
11061 self.early_release = early_release
11062 self.ignore_ipolicy = ignore_ipolicy
11065 self.instance = None
11066 self.new_node = None
11067 self.target_node = None
11068 self.other_node = None
11069 self.remote_node_info = None
11070 self.node_secondary_ip = None
11073 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11074 """Compute a new secondary node using an IAllocator.
11077 req = iallocator.IAReqRelocate(name=instance_name,
11078 relocate_from=list(relocate_from))
11079 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11081 ial.Run(iallocator_name)
11083 if not ial.success:
11084 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11085 " %s" % (iallocator_name, ial.info),
11086 errors.ECODE_NORES)
11088 remote_node_name = ial.result[0]
11090 lu.LogInfo("Selected new secondary for instance '%s': %s",
11091 instance_name, remote_node_name)
11093 return remote_node_name
11095 def _FindFaultyDisks(self, node_name):
11096 """Wrapper for L{_FindFaultyInstanceDisks}.
11099 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11102 def _CheckDisksActivated(self, instance):
11103 """Checks if the instance disks are activated.
11105 @param instance: The instance to check disks
11106 @return: True if they are activated, False otherwise
11109 nodes = instance.all_nodes
11111 for idx, dev in enumerate(instance.disks):
11113 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11114 self.cfg.SetDiskID(dev, node)
11116 result = _BlockdevFind(self, node, dev, instance)
11120 elif result.fail_msg or not result.payload:
11125 def CheckPrereq(self):
11126 """Check prerequisites.
11128 This checks that the instance is in the cluster.
11131 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11132 assert instance is not None, \
11133 "Cannot retrieve locked instance %s" % self.instance_name
11135 if instance.disk_template != constants.DT_DRBD8:
11136 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11137 " instances", errors.ECODE_INVAL)
11139 if len(instance.secondary_nodes) != 1:
11140 raise errors.OpPrereqError("The instance has a strange layout,"
11141 " expected one secondary but found %d" %
11142 len(instance.secondary_nodes),
11143 errors.ECODE_FAULT)
11145 if not self.delay_iallocator:
11146 self._CheckPrereq2()
11148 def _CheckPrereq2(self):
11149 """Check prerequisites, second part.
11151 This function should always be part of CheckPrereq. It was separated and is
11152 now called from Exec because during node evacuation iallocator was only
11153 called with an unmodified cluster model, not taking planned changes into
11157 instance = self.instance
11158 secondary_node = instance.secondary_nodes[0]
11160 if self.iallocator_name is None:
11161 remote_node = self.remote_node
11163 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11164 instance.name, instance.secondary_nodes)
11166 if remote_node is None:
11167 self.remote_node_info = None
11169 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11170 "Remote node '%s' is not locked" % remote_node
11172 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11173 assert self.remote_node_info is not None, \
11174 "Cannot retrieve locked node %s" % remote_node
11176 if remote_node == self.instance.primary_node:
11177 raise errors.OpPrereqError("The specified node is the primary node of"
11178 " the instance", errors.ECODE_INVAL)
11180 if remote_node == secondary_node:
11181 raise errors.OpPrereqError("The specified node is already the"
11182 " secondary node of the instance",
11183 errors.ECODE_INVAL)
11185 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11186 constants.REPLACE_DISK_CHG):
11187 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11188 errors.ECODE_INVAL)
11190 if self.mode == constants.REPLACE_DISK_AUTO:
11191 if not self._CheckDisksActivated(instance):
11192 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11193 " first" % self.instance_name,
11194 errors.ECODE_STATE)
11195 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11196 faulty_secondary = self._FindFaultyDisks(secondary_node)
11198 if faulty_primary and faulty_secondary:
11199 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11200 " one node and can not be repaired"
11201 " automatically" % self.instance_name,
11202 errors.ECODE_STATE)
11205 self.disks = faulty_primary
11206 self.target_node = instance.primary_node
11207 self.other_node = secondary_node
11208 check_nodes = [self.target_node, self.other_node]
11209 elif faulty_secondary:
11210 self.disks = faulty_secondary
11211 self.target_node = secondary_node
11212 self.other_node = instance.primary_node
11213 check_nodes = [self.target_node, self.other_node]
11219 # Non-automatic modes
11220 if self.mode == constants.REPLACE_DISK_PRI:
11221 self.target_node = instance.primary_node
11222 self.other_node = secondary_node
11223 check_nodes = [self.target_node, self.other_node]
11225 elif self.mode == constants.REPLACE_DISK_SEC:
11226 self.target_node = secondary_node
11227 self.other_node = instance.primary_node
11228 check_nodes = [self.target_node, self.other_node]
11230 elif self.mode == constants.REPLACE_DISK_CHG:
11231 self.new_node = remote_node
11232 self.other_node = instance.primary_node
11233 self.target_node = secondary_node
11234 check_nodes = [self.new_node, self.other_node]
11236 _CheckNodeNotDrained(self.lu, remote_node)
11237 _CheckNodeVmCapable(self.lu, remote_node)
11239 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11240 assert old_node_info is not None
11241 if old_node_info.offline and not self.early_release:
11242 # doesn't make sense to delay the release
11243 self.early_release = True
11244 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11245 " early-release mode", secondary_node)
11248 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11251 # If not specified all disks should be replaced
11253 self.disks = range(len(self.instance.disks))
11255 # TODO: This is ugly, but right now we can't distinguish between internal
11256 # submitted opcode and external one. We should fix that.
11257 if self.remote_node_info:
11258 # We change the node, lets verify it still meets instance policy
11259 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11260 cluster = self.cfg.GetClusterInfo()
11261 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11263 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11264 ignore=self.ignore_ipolicy)
11266 for node in check_nodes:
11267 _CheckNodeOnline(self.lu, node)
11269 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11272 if node_name is not None)
11274 # Release unneeded node and node resource locks
11275 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11276 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11278 # Release any owned node group
11279 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11280 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11282 # Check whether disks are valid
11283 for disk_idx in self.disks:
11284 instance.FindDisk(disk_idx)
11286 # Get secondary node IP addresses
11287 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11288 in self.cfg.GetMultiNodeInfo(touched_nodes))
11290 def Exec(self, feedback_fn):
11291 """Execute disk replacement.
11293 This dispatches the disk replacement to the appropriate handler.
11296 if self.delay_iallocator:
11297 self._CheckPrereq2()
11300 # Verify owned locks before starting operation
11301 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11302 assert set(owned_nodes) == set(self.node_secondary_ip), \
11303 ("Incorrect node locks, owning %s, expected %s" %
11304 (owned_nodes, self.node_secondary_ip.keys()))
11305 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11306 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11308 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11309 assert list(owned_instances) == [self.instance_name], \
11310 "Instance '%s' not locked" % self.instance_name
11312 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11313 "Should not own any node group lock at this point"
11316 feedback_fn("No disks need replacement for instance '%s'" %
11317 self.instance.name)
11320 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11321 (utils.CommaJoin(self.disks), self.instance.name))
11322 feedback_fn("Current primary node: %s", self.instance.primary_node)
11323 feedback_fn("Current seconary node: %s",
11324 utils.CommaJoin(self.instance.secondary_nodes))
11326 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11328 # Activate the instance disks if we're replacing them on a down instance
11330 _StartInstanceDisks(self.lu, self.instance, True)
11333 # Should we replace the secondary node?
11334 if self.new_node is not None:
11335 fn = self._ExecDrbd8Secondary
11337 fn = self._ExecDrbd8DiskOnly
11339 result = fn(feedback_fn)
11341 # Deactivate the instance disks if we're replacing them on a
11344 _SafeShutdownInstanceDisks(self.lu, self.instance)
11346 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11349 # Verify owned locks
11350 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11351 nodes = frozenset(self.node_secondary_ip)
11352 assert ((self.early_release and not owned_nodes) or
11353 (not self.early_release and not (set(owned_nodes) - nodes))), \
11354 ("Not owning the correct locks, early_release=%s, owned=%r,"
11355 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11359 def _CheckVolumeGroup(self, nodes):
11360 self.lu.LogInfo("Checking volume groups")
11362 vgname = self.cfg.GetVGName()
11364 # Make sure volume group exists on all involved nodes
11365 results = self.rpc.call_vg_list(nodes)
11367 raise errors.OpExecError("Can't list volume groups on the nodes")
11370 res = results[node]
11371 res.Raise("Error checking node %s" % node)
11372 if vgname not in res.payload:
11373 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11376 def _CheckDisksExistence(self, nodes):
11377 # Check disk existence
11378 for idx, dev in enumerate(self.instance.disks):
11379 if idx not in self.disks:
11383 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11384 self.cfg.SetDiskID(dev, node)
11386 result = _BlockdevFind(self, node, dev, self.instance)
11388 msg = result.fail_msg
11389 if msg or not result.payload:
11391 msg = "disk not found"
11392 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11395 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11396 for idx, dev in enumerate(self.instance.disks):
11397 if idx not in self.disks:
11400 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11403 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11404 on_primary, ldisk=ldisk):
11405 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11406 " replace disks for instance %s" %
11407 (node_name, self.instance.name))
11409 def _CreateNewStorage(self, node_name):
11410 """Create new storage on the primary or secondary node.
11412 This is only used for same-node replaces, not for changing the
11413 secondary node, hence we don't want to modify the existing disk.
11418 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11419 for idx, dev in enumerate(disks):
11420 if idx not in self.disks:
11423 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11425 self.cfg.SetDiskID(dev, node_name)
11427 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11428 names = _GenerateUniqueNames(self.lu, lv_names)
11430 (data_disk, meta_disk) = dev.children
11431 vg_data = data_disk.logical_id[0]
11432 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11433 logical_id=(vg_data, names[0]),
11434 params=data_disk.params)
11435 vg_meta = meta_disk.logical_id[0]
11436 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11437 size=constants.DRBD_META_SIZE,
11438 logical_id=(vg_meta, names[1]),
11439 params=meta_disk.params)
11441 new_lvs = [lv_data, lv_meta]
11442 old_lvs = [child.Copy() for child in dev.children]
11443 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11445 # we pass force_create=True to force the LVM creation
11446 for new_lv in new_lvs:
11447 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11448 _GetInstanceInfoText(self.instance), False)
11452 def _CheckDevices(self, node_name, iv_names):
11453 for name, (dev, _, _) in iv_names.iteritems():
11454 self.cfg.SetDiskID(dev, node_name)
11456 result = _BlockdevFind(self, node_name, dev, self.instance)
11458 msg = result.fail_msg
11459 if msg or not result.payload:
11461 msg = "disk not found"
11462 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11465 if result.payload.is_degraded:
11466 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11468 def _RemoveOldStorage(self, node_name, iv_names):
11469 for name, (_, old_lvs, _) in iv_names.iteritems():
11470 self.lu.LogInfo("Remove logical volumes for %s" % name)
11473 self.cfg.SetDiskID(lv, node_name)
11475 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11477 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11478 hint="remove unused LVs manually")
11480 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11481 """Replace a disk on the primary or secondary for DRBD 8.
11483 The algorithm for replace is quite complicated:
11485 1. for each disk to be replaced:
11487 1. create new LVs on the target node with unique names
11488 1. detach old LVs from the drbd device
11489 1. rename old LVs to name_replaced.<time_t>
11490 1. rename new LVs to old LVs
11491 1. attach the new LVs (with the old names now) to the drbd device
11493 1. wait for sync across all devices
11495 1. for each modified disk:
11497 1. remove old LVs (which have the name name_replaces.<time_t>)
11499 Failures are not very well handled.
11504 # Step: check device activation
11505 self.lu.LogStep(1, steps_total, "Check device existence")
11506 self._CheckDisksExistence([self.other_node, self.target_node])
11507 self._CheckVolumeGroup([self.target_node, self.other_node])
11509 # Step: check other node consistency
11510 self.lu.LogStep(2, steps_total, "Check peer consistency")
11511 self._CheckDisksConsistency(self.other_node,
11512 self.other_node == self.instance.primary_node,
11515 # Step: create new storage
11516 self.lu.LogStep(3, steps_total, "Allocate new storage")
11517 iv_names = self._CreateNewStorage(self.target_node)
11519 # Step: for each lv, detach+rename*2+attach
11520 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11521 for dev, old_lvs, new_lvs in iv_names.itervalues():
11522 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11524 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11526 result.Raise("Can't detach drbd from local storage on node"
11527 " %s for device %s" % (self.target_node, dev.iv_name))
11529 #cfg.Update(instance)
11531 # ok, we created the new LVs, so now we know we have the needed
11532 # storage; as such, we proceed on the target node to rename
11533 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11534 # using the assumption that logical_id == physical_id (which in
11535 # turn is the unique_id on that node)
11537 # FIXME(iustin): use a better name for the replaced LVs
11538 temp_suffix = int(time.time())
11539 ren_fn = lambda d, suff: (d.physical_id[0],
11540 d.physical_id[1] + "_replaced-%s" % suff)
11542 # Build the rename list based on what LVs exist on the node
11543 rename_old_to_new = []
11544 for to_ren in old_lvs:
11545 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11546 if not result.fail_msg and result.payload:
11548 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11550 self.lu.LogInfo("Renaming the old LVs on the target node")
11551 result = self.rpc.call_blockdev_rename(self.target_node,
11553 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11555 # Now we rename the new LVs to the old LVs
11556 self.lu.LogInfo("Renaming the new LVs on the target node")
11557 rename_new_to_old = [(new, old.physical_id)
11558 for old, new in zip(old_lvs, new_lvs)]
11559 result = self.rpc.call_blockdev_rename(self.target_node,
11561 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11563 # Intermediate steps of in memory modifications
11564 for old, new in zip(old_lvs, new_lvs):
11565 new.logical_id = old.logical_id
11566 self.cfg.SetDiskID(new, self.target_node)
11568 # We need to modify old_lvs so that removal later removes the
11569 # right LVs, not the newly added ones; note that old_lvs is a
11571 for disk in old_lvs:
11572 disk.logical_id = ren_fn(disk, temp_suffix)
11573 self.cfg.SetDiskID(disk, self.target_node)
11575 # Now that the new lvs have the old name, we can add them to the device
11576 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11577 result = self.rpc.call_blockdev_addchildren(self.target_node,
11578 (dev, self.instance), new_lvs)
11579 msg = result.fail_msg
11581 for new_lv in new_lvs:
11582 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11585 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11586 hint=("cleanup manually the unused logical"
11588 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11590 cstep = itertools.count(5)
11592 if self.early_release:
11593 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11594 self._RemoveOldStorage(self.target_node, iv_names)
11595 # TODO: Check if releasing locks early still makes sense
11596 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11598 # Release all resource locks except those used by the instance
11599 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11600 keep=self.node_secondary_ip.keys())
11602 # Release all node locks while waiting for sync
11603 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11605 # TODO: Can the instance lock be downgraded here? Take the optional disk
11606 # shutdown in the caller into consideration.
11609 # This can fail as the old devices are degraded and _WaitForSync
11610 # does a combined result over all disks, so we don't check its return value
11611 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11612 _WaitForSync(self.lu, self.instance)
11614 # Check all devices manually
11615 self._CheckDevices(self.instance.primary_node, iv_names)
11617 # Step: remove old storage
11618 if not self.early_release:
11619 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11620 self._RemoveOldStorage(self.target_node, iv_names)
11622 def _ExecDrbd8Secondary(self, feedback_fn):
11623 """Replace the secondary node for DRBD 8.
11625 The algorithm for replace is quite complicated:
11626 - for all disks of the instance:
11627 - create new LVs on the new node with same names
11628 - shutdown the drbd device on the old secondary
11629 - disconnect the drbd network on the primary
11630 - create the drbd device on the new secondary
11631 - network attach the drbd on the primary, using an artifice:
11632 the drbd code for Attach() will connect to the network if it
11633 finds a device which is connected to the good local disks but
11634 not network enabled
11635 - wait for sync across all devices
11636 - remove all disks from the old secondary
11638 Failures are not very well handled.
11643 pnode = self.instance.primary_node
11645 # Step: check device activation
11646 self.lu.LogStep(1, steps_total, "Check device existence")
11647 self._CheckDisksExistence([self.instance.primary_node])
11648 self._CheckVolumeGroup([self.instance.primary_node])
11650 # Step: check other node consistency
11651 self.lu.LogStep(2, steps_total, "Check peer consistency")
11652 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11654 # Step: create new storage
11655 self.lu.LogStep(3, steps_total, "Allocate new storage")
11656 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11657 for idx, dev in enumerate(disks):
11658 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11659 (self.new_node, idx))
11660 # we pass force_create=True to force LVM creation
11661 for new_lv in dev.children:
11662 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11663 True, _GetInstanceInfoText(self.instance), False)
11665 # Step 4: dbrd minors and drbd setups changes
11666 # after this, we must manually remove the drbd minors on both the
11667 # error and the success paths
11668 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11669 minors = self.cfg.AllocateDRBDMinor([self.new_node
11670 for dev in self.instance.disks],
11671 self.instance.name)
11672 logging.debug("Allocated minors %r", minors)
11675 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11676 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11677 (self.new_node, idx))
11678 # create new devices on new_node; note that we create two IDs:
11679 # one without port, so the drbd will be activated without
11680 # networking information on the new node at this stage, and one
11681 # with network, for the latter activation in step 4
11682 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11683 if self.instance.primary_node == o_node1:
11686 assert self.instance.primary_node == o_node2, "Three-node instance?"
11689 new_alone_id = (self.instance.primary_node, self.new_node, None,
11690 p_minor, new_minor, o_secret)
11691 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11692 p_minor, new_minor, o_secret)
11694 iv_names[idx] = (dev, dev.children, new_net_id)
11695 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11697 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11698 logical_id=new_alone_id,
11699 children=dev.children,
11702 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11705 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11707 _GetInstanceInfoText(self.instance), False)
11708 except errors.GenericError:
11709 self.cfg.ReleaseDRBDMinors(self.instance.name)
11712 # We have new devices, shutdown the drbd on the old secondary
11713 for idx, dev in enumerate(self.instance.disks):
11714 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11715 self.cfg.SetDiskID(dev, self.target_node)
11716 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11717 (dev, self.instance)).fail_msg
11719 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11720 "node: %s" % (idx, msg),
11721 hint=("Please cleanup this device manually as"
11722 " soon as possible"))
11724 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11725 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11726 self.instance.disks)[pnode]
11728 msg = result.fail_msg
11730 # detaches didn't succeed (unlikely)
11731 self.cfg.ReleaseDRBDMinors(self.instance.name)
11732 raise errors.OpExecError("Can't detach the disks from the network on"
11733 " old node: %s" % (msg,))
11735 # if we managed to detach at least one, we update all the disks of
11736 # the instance to point to the new secondary
11737 self.lu.LogInfo("Updating instance configuration")
11738 for dev, _, new_logical_id in iv_names.itervalues():
11739 dev.logical_id = new_logical_id
11740 self.cfg.SetDiskID(dev, self.instance.primary_node)
11742 self.cfg.Update(self.instance, feedback_fn)
11744 # Release all node locks (the configuration has been updated)
11745 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11747 # and now perform the drbd attach
11748 self.lu.LogInfo("Attaching primary drbds to new secondary"
11749 " (standalone => connected)")
11750 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11752 self.node_secondary_ip,
11753 (self.instance.disks, self.instance),
11754 self.instance.name,
11756 for to_node, to_result in result.items():
11757 msg = to_result.fail_msg
11759 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11761 hint=("please do a gnt-instance info to see the"
11762 " status of disks"))
11764 cstep = itertools.count(5)
11766 if self.early_release:
11767 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11768 self._RemoveOldStorage(self.target_node, iv_names)
11769 # TODO: Check if releasing locks early still makes sense
11770 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11772 # Release all resource locks except those used by the instance
11773 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11774 keep=self.node_secondary_ip.keys())
11776 # TODO: Can the instance lock be downgraded here? Take the optional disk
11777 # shutdown in the caller into consideration.
11780 # This can fail as the old devices are degraded and _WaitForSync
11781 # does a combined result over all disks, so we don't check its return value
11782 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11783 _WaitForSync(self.lu, self.instance)
11785 # Check all devices manually
11786 self._CheckDevices(self.instance.primary_node, iv_names)
11788 # Step: remove old storage
11789 if not self.early_release:
11790 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11791 self._RemoveOldStorage(self.target_node, iv_names)
11794 class LURepairNodeStorage(NoHooksLU):
11795 """Repairs the volume group on a node.
11800 def CheckArguments(self):
11801 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11803 storage_type = self.op.storage_type
11805 if (constants.SO_FIX_CONSISTENCY not in
11806 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11807 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11808 " repaired" % storage_type,
11809 errors.ECODE_INVAL)
11811 def ExpandNames(self):
11812 self.needed_locks = {
11813 locking.LEVEL_NODE: [self.op.node_name],
11816 def _CheckFaultyDisks(self, instance, node_name):
11817 """Ensure faulty disks abort the opcode or at least warn."""
11819 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11821 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11822 " node '%s'" % (instance.name, node_name),
11823 errors.ECODE_STATE)
11824 except errors.OpPrereqError, err:
11825 if self.op.ignore_consistency:
11826 self.proc.LogWarning(str(err.args[0]))
11830 def CheckPrereq(self):
11831 """Check prerequisites.
11834 # Check whether any instance on this node has faulty disks
11835 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11836 if inst.admin_state != constants.ADMINST_UP:
11838 check_nodes = set(inst.all_nodes)
11839 check_nodes.discard(self.op.node_name)
11840 for inst_node_name in check_nodes:
11841 self._CheckFaultyDisks(inst, inst_node_name)
11843 def Exec(self, feedback_fn):
11844 feedback_fn("Repairing storage unit '%s' on %s ..." %
11845 (self.op.name, self.op.node_name))
11847 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11848 result = self.rpc.call_storage_execute(self.op.node_name,
11849 self.op.storage_type, st_args,
11851 constants.SO_FIX_CONSISTENCY)
11852 result.Raise("Failed to repair storage unit '%s' on %s" %
11853 (self.op.name, self.op.node_name))
11856 class LUNodeEvacuate(NoHooksLU):
11857 """Evacuates instances off a list of nodes.
11862 _MODE2IALLOCATOR = {
11863 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11864 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11865 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11867 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11868 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11869 constants.IALLOCATOR_NEVAC_MODES)
11871 def CheckArguments(self):
11872 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11874 def ExpandNames(self):
11875 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11877 if self.op.remote_node is not None:
11878 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11879 assert self.op.remote_node
11881 if self.op.remote_node == self.op.node_name:
11882 raise errors.OpPrereqError("Can not use evacuated node as a new"
11883 " secondary node", errors.ECODE_INVAL)
11885 if self.op.mode != constants.NODE_EVAC_SEC:
11886 raise errors.OpPrereqError("Without the use of an iallocator only"
11887 " secondary instances can be evacuated",
11888 errors.ECODE_INVAL)
11891 self.share_locks = _ShareAll()
11892 self.needed_locks = {
11893 locking.LEVEL_INSTANCE: [],
11894 locking.LEVEL_NODEGROUP: [],
11895 locking.LEVEL_NODE: [],
11898 # Determine nodes (via group) optimistically, needs verification once locks
11899 # have been acquired
11900 self.lock_nodes = self._DetermineNodes()
11902 def _DetermineNodes(self):
11903 """Gets the list of nodes to operate on.
11906 if self.op.remote_node is None:
11907 # Iallocator will choose any node(s) in the same group
11908 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11910 group_nodes = frozenset([self.op.remote_node])
11912 # Determine nodes to be locked
11913 return set([self.op.node_name]) | group_nodes
11915 def _DetermineInstances(self):
11916 """Builds list of instances to operate on.
11919 assert self.op.mode in constants.NODE_EVAC_MODES
11921 if self.op.mode == constants.NODE_EVAC_PRI:
11922 # Primary instances only
11923 inst_fn = _GetNodePrimaryInstances
11924 assert self.op.remote_node is None, \
11925 "Evacuating primary instances requires iallocator"
11926 elif self.op.mode == constants.NODE_EVAC_SEC:
11927 # Secondary instances only
11928 inst_fn = _GetNodeSecondaryInstances
11931 assert self.op.mode == constants.NODE_EVAC_ALL
11932 inst_fn = _GetNodeInstances
11933 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11935 raise errors.OpPrereqError("Due to an issue with the iallocator"
11936 " interface it is not possible to evacuate"
11937 " all instances at once; specify explicitly"
11938 " whether to evacuate primary or secondary"
11940 errors.ECODE_INVAL)
11942 return inst_fn(self.cfg, self.op.node_name)
11944 def DeclareLocks(self, level):
11945 if level == locking.LEVEL_INSTANCE:
11946 # Lock instances optimistically, needs verification once node and group
11947 # locks have been acquired
11948 self.needed_locks[locking.LEVEL_INSTANCE] = \
11949 set(i.name for i in self._DetermineInstances())
11951 elif level == locking.LEVEL_NODEGROUP:
11952 # Lock node groups for all potential target nodes optimistically, needs
11953 # verification once nodes have been acquired
11954 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11955 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11957 elif level == locking.LEVEL_NODE:
11958 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11960 def CheckPrereq(self):
11962 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11963 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11964 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11966 need_nodes = self._DetermineNodes()
11968 if not owned_nodes.issuperset(need_nodes):
11969 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11970 " locks were acquired, current nodes are"
11971 " are '%s', used to be '%s'; retry the"
11973 (self.op.node_name,
11974 utils.CommaJoin(need_nodes),
11975 utils.CommaJoin(owned_nodes)),
11976 errors.ECODE_STATE)
11978 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11979 if owned_groups != wanted_groups:
11980 raise errors.OpExecError("Node groups changed since locks were acquired,"
11981 " current groups are '%s', used to be '%s';"
11982 " retry the operation" %
11983 (utils.CommaJoin(wanted_groups),
11984 utils.CommaJoin(owned_groups)))
11986 # Determine affected instances
11987 self.instances = self._DetermineInstances()
11988 self.instance_names = [i.name for i in self.instances]
11990 if set(self.instance_names) != owned_instances:
11991 raise errors.OpExecError("Instances on node '%s' changed since locks"
11992 " were acquired, current instances are '%s',"
11993 " used to be '%s'; retry the operation" %
11994 (self.op.node_name,
11995 utils.CommaJoin(self.instance_names),
11996 utils.CommaJoin(owned_instances)))
11998 if self.instance_names:
11999 self.LogInfo("Evacuating instances from node '%s': %s",
12001 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12003 self.LogInfo("No instances to evacuate from node '%s'",
12006 if self.op.remote_node is not None:
12007 for i in self.instances:
12008 if i.primary_node == self.op.remote_node:
12009 raise errors.OpPrereqError("Node %s is the primary node of"
12010 " instance %s, cannot use it as"
12012 (self.op.remote_node, i.name),
12013 errors.ECODE_INVAL)
12015 def Exec(self, feedback_fn):
12016 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12018 if not self.instance_names:
12019 # No instances to evacuate
12022 elif self.op.iallocator is not None:
12023 # TODO: Implement relocation to other group
12024 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12025 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12026 instances=list(self.instance_names))
12027 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12029 ial.Run(self.op.iallocator)
12031 if not ial.success:
12032 raise errors.OpPrereqError("Can't compute node evacuation using"
12033 " iallocator '%s': %s" %
12034 (self.op.iallocator, ial.info),
12035 errors.ECODE_NORES)
12037 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12039 elif self.op.remote_node is not None:
12040 assert self.op.mode == constants.NODE_EVAC_SEC
12042 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12043 remote_node=self.op.remote_node,
12045 mode=constants.REPLACE_DISK_CHG,
12046 early_release=self.op.early_release)]
12047 for instance_name in self.instance_names]
12050 raise errors.ProgrammerError("No iallocator or remote node")
12052 return ResultWithJobs(jobs)
12055 def _SetOpEarlyRelease(early_release, op):
12056 """Sets C{early_release} flag on opcodes if available.
12060 op.early_release = early_release
12061 except AttributeError:
12062 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12067 def _NodeEvacDest(use_nodes, group, nodes):
12068 """Returns group or nodes depending on caller's choice.
12072 return utils.CommaJoin(nodes)
12077 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12078 """Unpacks the result of change-group and node-evacuate iallocator requests.
12080 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12081 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12083 @type lu: L{LogicalUnit}
12084 @param lu: Logical unit instance
12085 @type alloc_result: tuple/list
12086 @param alloc_result: Result from iallocator
12087 @type early_release: bool
12088 @param early_release: Whether to release locks early if possible
12089 @type use_nodes: bool
12090 @param use_nodes: Whether to display node names instead of groups
12093 (moved, failed, jobs) = alloc_result
12096 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12097 for (name, reason) in failed)
12098 lu.LogWarning("Unable to evacuate instances %s", failreason)
12099 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12102 lu.LogInfo("Instances to be moved: %s",
12103 utils.CommaJoin("%s (to %s)" %
12104 (name, _NodeEvacDest(use_nodes, group, nodes))
12105 for (name, group, nodes) in moved))
12107 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12108 map(opcodes.OpCode.LoadOpCode, ops))
12112 def _DiskSizeInBytesToMebibytes(lu, size):
12113 """Converts a disk size in bytes to mebibytes.
12115 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12118 (mib, remainder) = divmod(size, 1024 * 1024)
12121 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12122 " to not overwrite existing data (%s bytes will not be"
12123 " wiped)", (1024 * 1024) - remainder)
12129 class LUInstanceGrowDisk(LogicalUnit):
12130 """Grow a disk of an instance.
12133 HPATH = "disk-grow"
12134 HTYPE = constants.HTYPE_INSTANCE
12137 def ExpandNames(self):
12138 self._ExpandAndLockInstance()
12139 self.needed_locks[locking.LEVEL_NODE] = []
12140 self.needed_locks[locking.LEVEL_NODE_RES] = []
12141 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12142 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12144 def DeclareLocks(self, level):
12145 if level == locking.LEVEL_NODE:
12146 self._LockInstancesNodes()
12147 elif level == locking.LEVEL_NODE_RES:
12149 self.needed_locks[locking.LEVEL_NODE_RES] = \
12150 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12152 def BuildHooksEnv(self):
12153 """Build hooks env.
12155 This runs on the master, the primary and all the secondaries.
12159 "DISK": self.op.disk,
12160 "AMOUNT": self.op.amount,
12161 "ABSOLUTE": self.op.absolute,
12163 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12166 def BuildHooksNodes(self):
12167 """Build hooks nodes.
12170 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12173 def CheckPrereq(self):
12174 """Check prerequisites.
12176 This checks that the instance is in the cluster.
12179 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12180 assert instance is not None, \
12181 "Cannot retrieve locked instance %s" % self.op.instance_name
12182 nodenames = list(instance.all_nodes)
12183 for node in nodenames:
12184 _CheckNodeOnline(self, node)
12186 self.instance = instance
12188 if instance.disk_template not in constants.DTS_GROWABLE:
12189 raise errors.OpPrereqError("Instance's disk layout does not support"
12190 " growing", errors.ECODE_INVAL)
12192 self.disk = instance.FindDisk(self.op.disk)
12194 if self.op.absolute:
12195 self.target = self.op.amount
12196 self.delta = self.target - self.disk.size
12198 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12199 "current disk size (%s)" %
12200 (utils.FormatUnit(self.target, "h"),
12201 utils.FormatUnit(self.disk.size, "h")),
12202 errors.ECODE_STATE)
12204 self.delta = self.op.amount
12205 self.target = self.disk.size + self.delta
12207 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12208 utils.FormatUnit(self.delta, "h"),
12209 errors.ECODE_INVAL)
12211 if instance.disk_template not in (constants.DT_FILE,
12212 constants.DT_SHARED_FILE,
12214 # TODO: check the free disk space for file, when that feature will be
12216 _CheckNodesFreeDiskPerVG(self, nodenames,
12217 self.disk.ComputeGrowth(self.delta))
12219 def Exec(self, feedback_fn):
12220 """Execute disk grow.
12223 instance = self.instance
12226 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12227 assert (self.owned_locks(locking.LEVEL_NODE) ==
12228 self.owned_locks(locking.LEVEL_NODE_RES))
12230 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12232 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12234 raise errors.OpExecError("Cannot activate block device to grow")
12236 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12237 (self.op.disk, instance.name,
12238 utils.FormatUnit(self.delta, "h"),
12239 utils.FormatUnit(self.target, "h")))
12241 # First run all grow ops in dry-run mode
12242 for node in instance.all_nodes:
12243 self.cfg.SetDiskID(disk, node)
12244 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12246 result.Raise("Dry-run grow request failed to node %s" % node)
12249 # Get disk size from primary node for wiping
12250 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12251 result.Raise("Failed to retrieve disk size from node '%s'" %
12252 instance.primary_node)
12254 (disk_size_in_bytes, ) = result.payload
12256 if disk_size_in_bytes is None:
12257 raise errors.OpExecError("Failed to retrieve disk size from primary"
12258 " node '%s'" % instance.primary_node)
12260 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12262 assert old_disk_size >= disk.size, \
12263 ("Retrieved disk size too small (got %s, should be at least %s)" %
12264 (old_disk_size, disk.size))
12266 old_disk_size = None
12268 # We know that (as far as we can test) operations across different
12269 # nodes will succeed, time to run it for real on the backing storage
12270 for node in instance.all_nodes:
12271 self.cfg.SetDiskID(disk, node)
12272 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12274 result.Raise("Grow request failed to node %s" % node)
12276 # And now execute it for logical storage, on the primary node
12277 node = instance.primary_node
12278 self.cfg.SetDiskID(disk, node)
12279 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12281 result.Raise("Grow request failed to node %s" % node)
12283 disk.RecordGrow(self.delta)
12284 self.cfg.Update(instance, feedback_fn)
12286 # Changes have been recorded, release node lock
12287 _ReleaseLocks(self, locking.LEVEL_NODE)
12289 # Downgrade lock while waiting for sync
12290 self.glm.downgrade(locking.LEVEL_INSTANCE)
12292 assert wipe_disks ^ (old_disk_size is None)
12295 assert instance.disks[self.op.disk] == disk
12297 # Wipe newly added disk space
12298 _WipeDisks(self, instance,
12299 disks=[(self.op.disk, disk, old_disk_size)])
12301 if self.op.wait_for_sync:
12302 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12304 self.proc.LogWarning("Disk sync-ing has not returned a good"
12305 " status; please check the instance")
12306 if instance.admin_state != constants.ADMINST_UP:
12307 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12308 elif instance.admin_state != constants.ADMINST_UP:
12309 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12310 " not supposed to be running because no wait for"
12311 " sync mode was requested")
12313 assert self.owned_locks(locking.LEVEL_NODE_RES)
12314 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12317 class LUInstanceQueryData(NoHooksLU):
12318 """Query runtime instance data.
12323 def ExpandNames(self):
12324 self.needed_locks = {}
12326 # Use locking if requested or when non-static information is wanted
12327 if not (self.op.static or self.op.use_locking):
12328 self.LogWarning("Non-static data requested, locks need to be acquired")
12329 self.op.use_locking = True
12331 if self.op.instances or not self.op.use_locking:
12332 # Expand instance names right here
12333 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12335 # Will use acquired locks
12336 self.wanted_names = None
12338 if self.op.use_locking:
12339 self.share_locks = _ShareAll()
12341 if self.wanted_names is None:
12342 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12344 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12346 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12347 self.needed_locks[locking.LEVEL_NODE] = []
12348 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12350 def DeclareLocks(self, level):
12351 if self.op.use_locking:
12352 if level == locking.LEVEL_NODEGROUP:
12353 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12355 # Lock all groups used by instances optimistically; this requires going
12356 # via the node before it's locked, requiring verification later on
12357 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12358 frozenset(group_uuid
12359 for instance_name in owned_instances
12361 self.cfg.GetInstanceNodeGroups(instance_name))
12363 elif level == locking.LEVEL_NODE:
12364 self._LockInstancesNodes()
12366 def CheckPrereq(self):
12367 """Check prerequisites.
12369 This only checks the optional instance list against the existing names.
12372 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12373 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12374 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12376 if self.wanted_names is None:
12377 assert self.op.use_locking, "Locking was not used"
12378 self.wanted_names = owned_instances
12380 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12382 if self.op.use_locking:
12383 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12386 assert not (owned_instances or owned_groups or owned_nodes)
12388 self.wanted_instances = instances.values()
12390 def _ComputeBlockdevStatus(self, node, instance, dev):
12391 """Returns the status of a block device
12394 if self.op.static or not node:
12397 self.cfg.SetDiskID(dev, node)
12399 result = self.rpc.call_blockdev_find(node, dev)
12403 result.Raise("Can't compute disk status for %s" % instance.name)
12405 status = result.payload
12409 return (status.dev_path, status.major, status.minor,
12410 status.sync_percent, status.estimated_time,
12411 status.is_degraded, status.ldisk_status)
12413 def _ComputeDiskStatus(self, instance, snode, dev):
12414 """Compute block device status.
12417 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12419 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12421 def _ComputeDiskStatusInner(self, instance, snode, dev):
12422 """Compute block device status.
12424 @attention: The device has to be annotated already.
12427 if dev.dev_type in constants.LDS_DRBD:
12428 # we change the snode then (otherwise we use the one passed in)
12429 if dev.logical_id[0] == instance.primary_node:
12430 snode = dev.logical_id[1]
12432 snode = dev.logical_id[0]
12434 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12436 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12439 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12446 "iv_name": dev.iv_name,
12447 "dev_type": dev.dev_type,
12448 "logical_id": dev.logical_id,
12449 "physical_id": dev.physical_id,
12450 "pstatus": dev_pstatus,
12451 "sstatus": dev_sstatus,
12452 "children": dev_children,
12457 def Exec(self, feedback_fn):
12458 """Gather and return data"""
12461 cluster = self.cfg.GetClusterInfo()
12463 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12464 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12466 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12467 for node in nodes.values()))
12469 group2name_fn = lambda uuid: groups[uuid].name
12471 for instance in self.wanted_instances:
12472 pnode = nodes[instance.primary_node]
12474 if self.op.static or pnode.offline:
12475 remote_state = None
12477 self.LogWarning("Primary node %s is marked offline, returning static"
12478 " information only for instance %s" %
12479 (pnode.name, instance.name))
12481 remote_info = self.rpc.call_instance_info(instance.primary_node,
12483 instance.hypervisor)
12484 remote_info.Raise("Error checking node %s" % instance.primary_node)
12485 remote_info = remote_info.payload
12486 if remote_info and "state" in remote_info:
12487 remote_state = "up"
12489 if instance.admin_state == constants.ADMINST_UP:
12490 remote_state = "down"
12492 remote_state = instance.admin_state
12494 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12497 snodes_group_uuids = [nodes[snode_name].group
12498 for snode_name in instance.secondary_nodes]
12500 result[instance.name] = {
12501 "name": instance.name,
12502 "config_state": instance.admin_state,
12503 "run_state": remote_state,
12504 "pnode": instance.primary_node,
12505 "pnode_group_uuid": pnode.group,
12506 "pnode_group_name": group2name_fn(pnode.group),
12507 "snodes": instance.secondary_nodes,
12508 "snodes_group_uuids": snodes_group_uuids,
12509 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12511 # this happens to be the same format used for hooks
12512 "nics": _NICListToTuple(self, instance.nics),
12513 "disk_template": instance.disk_template,
12515 "hypervisor": instance.hypervisor,
12516 "network_port": instance.network_port,
12517 "hv_instance": instance.hvparams,
12518 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12519 "be_instance": instance.beparams,
12520 "be_actual": cluster.FillBE(instance),
12521 "os_instance": instance.osparams,
12522 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12523 "serial_no": instance.serial_no,
12524 "mtime": instance.mtime,
12525 "ctime": instance.ctime,
12526 "uuid": instance.uuid,
12532 def PrepareContainerMods(mods, private_fn):
12533 """Prepares a list of container modifications by adding a private data field.
12535 @type mods: list of tuples; (operation, index, parameters)
12536 @param mods: List of modifications
12537 @type private_fn: callable or None
12538 @param private_fn: Callable for constructing a private data field for a
12543 if private_fn is None:
12548 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12551 #: Type description for changes as returned by L{ApplyContainerMods}'s
12553 _TApplyContModsCbChanges = \
12554 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12555 ht.TNonEmptyString,
12560 def ApplyContainerMods(kind, container, chgdesc, mods,
12561 create_fn, modify_fn, remove_fn):
12562 """Applies descriptions in C{mods} to C{container}.
12565 @param kind: One-word item description
12566 @type container: list
12567 @param container: Container to modify
12568 @type chgdesc: None or list
12569 @param chgdesc: List of applied changes
12571 @param mods: Modifications as returned by L{PrepareContainerMods}
12572 @type create_fn: callable
12573 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12574 receives absolute item index, parameters and private data object as added
12575 by L{PrepareContainerMods}, returns tuple containing new item and changes
12577 @type modify_fn: callable
12578 @param modify_fn: Callback for modifying an existing item
12579 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12580 and private data object as added by L{PrepareContainerMods}, returns
12582 @type remove_fn: callable
12583 @param remove_fn: Callback on removing item; receives absolute item index,
12584 item and private data object as added by L{PrepareContainerMods}
12587 for (op, idx, params, private) in mods:
12590 absidx = len(container) - 1
12592 raise IndexError("Not accepting negative indices other than -1")
12593 elif idx > len(container):
12594 raise IndexError("Got %s index %s, but there are only %s" %
12595 (kind, idx, len(container)))
12601 if op == constants.DDM_ADD:
12602 # Calculate where item will be added
12604 addidx = len(container)
12608 if create_fn is None:
12611 (item, changes) = create_fn(addidx, params, private)
12614 container.append(item)
12617 assert idx <= len(container)
12618 # list.insert does so before the specified index
12619 container.insert(idx, item)
12621 # Retrieve existing item
12623 item = container[absidx]
12625 raise IndexError("Invalid %s index %s" % (kind, idx))
12627 if op == constants.DDM_REMOVE:
12630 if remove_fn is not None:
12631 remove_fn(absidx, item, private)
12633 changes = [("%s/%s" % (kind, absidx), "remove")]
12635 assert container[absidx] == item
12636 del container[absidx]
12637 elif op == constants.DDM_MODIFY:
12638 if modify_fn is not None:
12639 changes = modify_fn(absidx, item, params, private)
12641 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12643 assert _TApplyContModsCbChanges(changes)
12645 if not (chgdesc is None or changes is None):
12646 chgdesc.extend(changes)
12649 def _UpdateIvNames(base_index, disks):
12650 """Updates the C{iv_name} attribute of disks.
12652 @type disks: list of L{objects.Disk}
12655 for (idx, disk) in enumerate(disks):
12656 disk.iv_name = "disk/%s" % (base_index + idx, )
12659 class _InstNicModPrivate:
12660 """Data structure for network interface modifications.
12662 Used by L{LUInstanceSetParams}.
12665 def __init__(self):
12670 class LUInstanceSetParams(LogicalUnit):
12671 """Modifies an instances's parameters.
12674 HPATH = "instance-modify"
12675 HTYPE = constants.HTYPE_INSTANCE
12679 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12680 assert ht.TList(mods)
12681 assert not mods or len(mods[0]) in (2, 3)
12683 if mods and len(mods[0]) == 2:
12687 for op, params in mods:
12688 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12689 result.append((op, -1, params))
12693 raise errors.OpPrereqError("Only one %s add or remove operation is"
12694 " supported at a time" % kind,
12695 errors.ECODE_INVAL)
12697 result.append((constants.DDM_MODIFY, op, params))
12699 assert verify_fn(result)
12706 def _CheckMods(kind, mods, key_types, item_fn):
12707 """Ensures requested disk/NIC modifications are valid.
12710 for (op, _, params) in mods:
12711 assert ht.TDict(params)
12713 utils.ForceDictType(params, key_types)
12715 if op == constants.DDM_REMOVE:
12717 raise errors.OpPrereqError("No settings should be passed when"
12718 " removing a %s" % kind,
12719 errors.ECODE_INVAL)
12720 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12721 item_fn(op, params)
12723 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12726 def _VerifyDiskModification(op, params):
12727 """Verifies a disk modification.
12730 if op == constants.DDM_ADD:
12731 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12732 if mode not in constants.DISK_ACCESS_SET:
12733 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12734 errors.ECODE_INVAL)
12736 size = params.get(constants.IDISK_SIZE, None)
12738 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12739 constants.IDISK_SIZE, errors.ECODE_INVAL)
12743 except (TypeError, ValueError), err:
12744 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12745 errors.ECODE_INVAL)
12747 params[constants.IDISK_SIZE] = size
12749 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12750 raise errors.OpPrereqError("Disk size change not possible, use"
12751 " grow-disk", errors.ECODE_INVAL)
12754 def _VerifyNicModification(op, params):
12755 """Verifies a network interface modification.
12758 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12759 ip = params.get(constants.INIC_IP, None)
12760 req_net = params.get(constants.INIC_NETWORK, None)
12761 link = params.get(constants.NIC_LINK, None)
12762 mode = params.get(constants.NIC_MODE, None)
12763 if req_net is not None:
12764 if req_net.lower() == constants.VALUE_NONE:
12765 params[constants.INIC_NETWORK] = None
12767 elif link is not None or mode is not None:
12768 raise errors.OpPrereqError("If network is given"
12769 " mode or link should not",
12770 errors.ECODE_INVAL)
12772 if op == constants.DDM_ADD:
12773 macaddr = params.get(constants.INIC_MAC, None)
12774 if macaddr is None:
12775 params[constants.INIC_MAC] = constants.VALUE_AUTO
12778 if ip.lower() == constants.VALUE_NONE:
12779 params[constants.INIC_IP] = None
12781 if ip.lower() == constants.NIC_IP_POOL:
12782 if op == constants.DDM_ADD and req_net is None:
12783 raise errors.OpPrereqError("If ip=pool, parameter network"
12785 errors.ECODE_INVAL)
12787 if not netutils.IPAddress.IsValid(ip):
12788 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12789 errors.ECODE_INVAL)
12791 if constants.INIC_MAC in params:
12792 macaddr = params[constants.INIC_MAC]
12793 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12794 macaddr = utils.NormalizeAndValidateMac(macaddr)
12796 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12797 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12798 " modifying an existing NIC",
12799 errors.ECODE_INVAL)
12801 def CheckArguments(self):
12802 if not (self.op.nics or self.op.disks or self.op.disk_template or
12803 self.op.hvparams or self.op.beparams or self.op.os_name or
12804 self.op.offline is not None or self.op.runtime_mem):
12805 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12807 if self.op.hvparams:
12808 _CheckGlobalHvParams(self.op.hvparams)
12810 self.op.disks = self._UpgradeDiskNicMods(
12811 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12812 self.op.nics = self._UpgradeDiskNicMods(
12813 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12815 # Check disk modifications
12816 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12817 self._VerifyDiskModification)
12819 if self.op.disks and self.op.disk_template is not None:
12820 raise errors.OpPrereqError("Disk template conversion and other disk"
12821 " changes not supported at the same time",
12822 errors.ECODE_INVAL)
12824 if (self.op.disk_template and
12825 self.op.disk_template in constants.DTS_INT_MIRROR and
12826 self.op.remote_node is None):
12827 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12828 " one requires specifying a secondary node",
12829 errors.ECODE_INVAL)
12831 # Check NIC modifications
12832 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12833 self._VerifyNicModification)
12835 def ExpandNames(self):
12836 self._ExpandAndLockInstance()
12837 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12838 # Can't even acquire node locks in shared mode as upcoming changes in
12839 # Ganeti 2.6 will start to modify the node object on disk conversion
12840 self.needed_locks[locking.LEVEL_NODE] = []
12841 self.needed_locks[locking.LEVEL_NODE_RES] = []
12842 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12843 # Look node group to look up the ipolicy
12844 self.share_locks[locking.LEVEL_NODEGROUP] = 1
12846 def DeclareLocks(self, level):
12847 if level == locking.LEVEL_NODEGROUP:
12848 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12849 # Acquire locks for the instance's nodegroups optimistically. Needs
12850 # to be verified in CheckPrereq
12851 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12852 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12853 elif level == locking.LEVEL_NODE:
12854 self._LockInstancesNodes()
12855 if self.op.disk_template and self.op.remote_node:
12856 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12857 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12858 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12860 self.needed_locks[locking.LEVEL_NODE_RES] = \
12861 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12863 def BuildHooksEnv(self):
12864 """Build hooks env.
12866 This runs on the master, primary and secondaries.
12870 if constants.BE_MINMEM in self.be_new:
12871 args["minmem"] = self.be_new[constants.BE_MINMEM]
12872 if constants.BE_MAXMEM in self.be_new:
12873 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12874 if constants.BE_VCPUS in self.be_new:
12875 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12876 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12877 # information at all.
12879 if self._new_nics is not None:
12882 for nic in self._new_nics:
12883 n = copy.deepcopy(nic)
12884 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12885 n.nicparams = nicparams
12886 nics.append(_NICToTuple(self, n))
12888 args["nics"] = nics
12890 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12891 if self.op.disk_template:
12892 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12893 if self.op.runtime_mem:
12894 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12898 def BuildHooksNodes(self):
12899 """Build hooks nodes.
12902 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12905 def _PrepareNicModification(self, params, private, old_ip, old_net,
12906 old_params, cluster, pnode):
12908 update_params_dict = dict([(key, params[key])
12909 for key in constants.NICS_PARAMETERS
12912 req_link = update_params_dict.get(constants.NIC_LINK, None)
12913 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12915 new_net = params.get(constants.INIC_NETWORK, old_net)
12916 if new_net is not None:
12917 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12918 if netparams is None:
12919 raise errors.OpPrereqError("No netparams found for the network"
12920 " %s, propably not connected." % new_net,
12921 errors.ECODE_INVAL)
12922 new_params = dict(netparams)
12924 new_params = _GetUpdatedParams(old_params, update_params_dict)
12926 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12928 new_filled_params = cluster.SimpleFillNIC(new_params)
12929 objects.NIC.CheckParameterSyntax(new_filled_params)
12931 new_mode = new_filled_params[constants.NIC_MODE]
12932 if new_mode == constants.NIC_MODE_BRIDGED:
12933 bridge = new_filled_params[constants.NIC_LINK]
12934 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12936 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12938 self.warn.append(msg)
12940 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12942 elif new_mode == constants.NIC_MODE_ROUTED:
12943 ip = params.get(constants.INIC_IP, old_ip)
12945 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12946 " on a routed NIC", errors.ECODE_INVAL)
12948 if constants.INIC_MAC in params:
12949 mac = params[constants.INIC_MAC]
12951 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12952 errors.ECODE_INVAL)
12953 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12954 # otherwise generate the MAC address
12955 params[constants.INIC_MAC] = \
12956 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12958 # or validate/reserve the current one
12960 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12961 except errors.ReservationError:
12962 raise errors.OpPrereqError("MAC address '%s' already in use"
12963 " in cluster" % mac,
12964 errors.ECODE_NOTUNIQUE)
12965 elif new_net != old_net:
12967 def get_net_prefix(net):
12969 uuid = self.cfg.LookupNetwork(net)
12971 nobj = self.cfg.GetNetwork(uuid)
12972 return nobj.mac_prefix
12975 new_prefix = get_net_prefix(new_net)
12976 old_prefix = get_net_prefix(old_net)
12977 if old_prefix != new_prefix:
12978 params[constants.INIC_MAC] = \
12979 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12981 #if there is a change in nic-network configuration
12982 new_ip = params.get(constants.INIC_IP, old_ip)
12983 if (new_ip, new_net) != (old_ip, old_net):
12986 if new_ip.lower() == constants.NIC_IP_POOL:
12988 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12989 except errors.ReservationError:
12990 raise errors.OpPrereqError("Unable to get a free IP"
12991 " from the address pool",
12992 errors.ECODE_STATE)
12993 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12994 params[constants.INIC_IP] = new_ip
12995 elif new_ip != old_ip or new_net != old_net:
12997 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12998 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12999 except errors.ReservationError:
13000 raise errors.OpPrereqError("IP %s not available in network %s" %
13002 errors.ECODE_NOTUNIQUE)
13003 elif new_ip.lower() == constants.NIC_IP_POOL:
13004 raise errors.OpPrereqError("ip=pool, but no network found",
13005 errors.ECODE_INVAL)
13008 if self.op.conflicts_check:
13009 _CheckForConflictingIp(self, new_ip, pnode)
13014 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13015 except errors.AddressPoolError:
13016 logging.warning("Release IP %s not contained in network %s",
13019 # there are no changes in (net, ip) tuple
13020 elif (old_net is not None and
13021 (req_link is not None or req_mode is not None)):
13022 raise errors.OpPrereqError("Not allowed to change link or mode of"
13023 " a NIC that is connected to a network.",
13024 errors.ECODE_INVAL)
13026 private.params = new_params
13027 private.filled = new_filled_params
13029 def CheckPrereq(self):
13030 """Check prerequisites.
13032 This only checks the instance list against the existing names.
13035 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13036 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13038 cluster = self.cluster = self.cfg.GetClusterInfo()
13039 assert self.instance is not None, \
13040 "Cannot retrieve locked instance %s" % self.op.instance_name
13042 pnode = instance.primary_node
13043 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13044 nodelist = list(instance.all_nodes)
13045 pnode_info = self.cfg.GetNodeInfo(pnode)
13046 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13048 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13049 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13050 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13052 # dictionary with instance information after the modification
13055 # Prepare disk/NIC modifications
13056 self.diskmod = PrepareContainerMods(self.op.disks, None)
13057 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13060 if self.op.os_name and not self.op.force:
13061 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13062 self.op.force_variant)
13063 instance_os = self.op.os_name
13065 instance_os = instance.os
13067 assert not (self.op.disk_template and self.op.disks), \
13068 "Can't modify disk template and apply disk changes at the same time"
13070 if self.op.disk_template:
13071 if instance.disk_template == self.op.disk_template:
13072 raise errors.OpPrereqError("Instance already has disk template %s" %
13073 instance.disk_template, errors.ECODE_INVAL)
13075 if (instance.disk_template,
13076 self.op.disk_template) not in self._DISK_CONVERSIONS:
13077 raise errors.OpPrereqError("Unsupported disk template conversion from"
13078 " %s to %s" % (instance.disk_template,
13079 self.op.disk_template),
13080 errors.ECODE_INVAL)
13081 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13082 msg="cannot change disk template")
13083 if self.op.disk_template in constants.DTS_INT_MIRROR:
13084 if self.op.remote_node == pnode:
13085 raise errors.OpPrereqError("Given new secondary node %s is the same"
13086 " as the primary node of the instance" %
13087 self.op.remote_node, errors.ECODE_STATE)
13088 _CheckNodeOnline(self, self.op.remote_node)
13089 _CheckNodeNotDrained(self, self.op.remote_node)
13090 # FIXME: here we assume that the old instance type is DT_PLAIN
13091 assert instance.disk_template == constants.DT_PLAIN
13092 disks = [{constants.IDISK_SIZE: d.size,
13093 constants.IDISK_VG: d.logical_id[0]}
13094 for d in instance.disks]
13095 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13096 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13098 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13099 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13100 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13102 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13103 ignore=self.op.ignore_ipolicy)
13104 if pnode_info.group != snode_info.group:
13105 self.LogWarning("The primary and secondary nodes are in two"
13106 " different node groups; the disk parameters"
13107 " from the first disk's node group will be"
13110 # hvparams processing
13111 if self.op.hvparams:
13112 hv_type = instance.hypervisor
13113 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13114 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13115 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13118 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13119 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13120 self.hv_proposed = self.hv_new = hv_new # the new actual values
13121 self.hv_inst = i_hvdict # the new dict (without defaults)
13123 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13125 self.hv_new = self.hv_inst = {}
13127 # beparams processing
13128 if self.op.beparams:
13129 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13131 objects.UpgradeBeParams(i_bedict)
13132 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13133 be_new = cluster.SimpleFillBE(i_bedict)
13134 self.be_proposed = self.be_new = be_new # the new actual values
13135 self.be_inst = i_bedict # the new dict (without defaults)
13137 self.be_new = self.be_inst = {}
13138 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13139 be_old = cluster.FillBE(instance)
13141 # CPU param validation -- checking every time a parameter is
13142 # changed to cover all cases where either CPU mask or vcpus have
13144 if (constants.BE_VCPUS in self.be_proposed and
13145 constants.HV_CPU_MASK in self.hv_proposed):
13147 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13148 # Verify mask is consistent with number of vCPUs. Can skip this
13149 # test if only 1 entry in the CPU mask, which means same mask
13150 # is applied to all vCPUs.
13151 if (len(cpu_list) > 1 and
13152 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13153 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13155 (self.be_proposed[constants.BE_VCPUS],
13156 self.hv_proposed[constants.HV_CPU_MASK]),
13157 errors.ECODE_INVAL)
13159 # Only perform this test if a new CPU mask is given
13160 if constants.HV_CPU_MASK in self.hv_new:
13161 # Calculate the largest CPU number requested
13162 max_requested_cpu = max(map(max, cpu_list))
13163 # Check that all of the instance's nodes have enough physical CPUs to
13164 # satisfy the requested CPU mask
13165 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13166 max_requested_cpu + 1, instance.hypervisor)
13168 # osparams processing
13169 if self.op.osparams:
13170 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13171 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13172 self.os_inst = i_osdict # the new dict (without defaults)
13178 #TODO(dynmem): do the appropriate check involving MINMEM
13179 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13180 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13181 mem_check_list = [pnode]
13182 if be_new[constants.BE_AUTO_BALANCE]:
13183 # either we changed auto_balance to yes or it was from before
13184 mem_check_list.extend(instance.secondary_nodes)
13185 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13186 instance.hypervisor)
13187 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13188 [instance.hypervisor])
13189 pninfo = nodeinfo[pnode]
13190 msg = pninfo.fail_msg
13192 # Assume the primary node is unreachable and go ahead
13193 self.warn.append("Can't get info from primary node %s: %s" %
13196 (_, _, (pnhvinfo, )) = pninfo.payload
13197 if not isinstance(pnhvinfo.get("memory_free", None), int):
13198 self.warn.append("Node data from primary node %s doesn't contain"
13199 " free memory information" % pnode)
13200 elif instance_info.fail_msg:
13201 self.warn.append("Can't get instance runtime information: %s" %
13202 instance_info.fail_msg)
13204 if instance_info.payload:
13205 current_mem = int(instance_info.payload["memory"])
13207 # Assume instance not running
13208 # (there is a slight race condition here, but it's not very
13209 # probable, and we have no other way to check)
13210 # TODO: Describe race condition
13212 #TODO(dynmem): do the appropriate check involving MINMEM
13213 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13214 pnhvinfo["memory_free"])
13216 raise errors.OpPrereqError("This change will prevent the instance"
13217 " from starting, due to %d MB of memory"
13218 " missing on its primary node" %
13219 miss_mem, errors.ECODE_NORES)
13221 if be_new[constants.BE_AUTO_BALANCE]:
13222 for node, nres in nodeinfo.items():
13223 if node not in instance.secondary_nodes:
13225 nres.Raise("Can't get info from secondary node %s" % node,
13226 prereq=True, ecode=errors.ECODE_STATE)
13227 (_, _, (nhvinfo, )) = nres.payload
13228 if not isinstance(nhvinfo.get("memory_free", None), int):
13229 raise errors.OpPrereqError("Secondary node %s didn't return free"
13230 " memory information" % node,
13231 errors.ECODE_STATE)
13232 #TODO(dynmem): do the appropriate check involving MINMEM
13233 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13234 raise errors.OpPrereqError("This change will prevent the instance"
13235 " from failover to its secondary node"
13236 " %s, due to not enough memory" % node,
13237 errors.ECODE_STATE)
13239 if self.op.runtime_mem:
13240 remote_info = self.rpc.call_instance_info(instance.primary_node,
13242 instance.hypervisor)
13243 remote_info.Raise("Error checking node %s" % instance.primary_node)
13244 if not remote_info.payload: # not running already
13245 raise errors.OpPrereqError("Instance %s is not running" %
13246 instance.name, errors.ECODE_STATE)
13248 current_memory = remote_info.payload["memory"]
13249 if (not self.op.force and
13250 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13251 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13252 raise errors.OpPrereqError("Instance %s must have memory between %d"
13253 " and %d MB of memory unless --force is"
13256 self.be_proposed[constants.BE_MINMEM],
13257 self.be_proposed[constants.BE_MAXMEM]),
13258 errors.ECODE_INVAL)
13260 delta = self.op.runtime_mem - current_memory
13262 _CheckNodeFreeMemory(self, instance.primary_node,
13263 "ballooning memory for instance %s" %
13264 instance.name, delta, instance.hypervisor)
13266 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13267 raise errors.OpPrereqError("Disk operations not supported for"
13268 " diskless instances", errors.ECODE_INVAL)
13270 def _PrepareNicCreate(_, params, private):
13271 self._PrepareNicModification(params, private, None, None,
13272 {}, cluster, pnode)
13273 return (None, None)
13275 def _PrepareNicMod(_, nic, params, private):
13276 self._PrepareNicModification(params, private, nic.ip, nic.network,
13277 nic.nicparams, cluster, pnode)
13280 def _PrepareNicRemove(_, params, __):
13282 net = params.network
13283 if net is not None and ip is not None:
13284 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13286 # Verify NIC changes (operating on copy)
13287 nics = instance.nics[:]
13288 ApplyContainerMods("NIC", nics, None, self.nicmod,
13289 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13290 if len(nics) > constants.MAX_NICS:
13291 raise errors.OpPrereqError("Instance has too many network interfaces"
13292 " (%d), cannot add more" % constants.MAX_NICS,
13293 errors.ECODE_STATE)
13295 # Verify disk changes (operating on a copy)
13296 disks = instance.disks[:]
13297 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13298 if len(disks) > constants.MAX_DISKS:
13299 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13300 " more" % constants.MAX_DISKS,
13301 errors.ECODE_STATE)
13302 disk_sizes = [disk.size for disk in instance.disks]
13303 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13305 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13306 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13308 if self.op.offline is not None:
13309 if self.op.offline:
13310 msg = "can't change to offline"
13312 msg = "can't change to online"
13313 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13315 # Pre-compute NIC changes (necessary to use result in hooks)
13316 self._nic_chgdesc = []
13318 # Operate on copies as this is still in prereq
13319 nics = [nic.Copy() for nic in instance.nics]
13320 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13321 self._CreateNewNic, self._ApplyNicMods, None)
13322 self._new_nics = nics
13323 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13325 self._new_nics = None
13326 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13328 if not self.op.ignore_ipolicy:
13329 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13332 # Fill ispec with backend parameters
13333 ispec[constants.ISPEC_SPINDLE_USE] = \
13334 self.be_new.get(constants.BE_SPINDLE_USE, None)
13335 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13338 # Copy ispec to verify parameters with min/max values separately
13339 ispec_max = ispec.copy()
13340 ispec_max[constants.ISPEC_MEM_SIZE] = \
13341 self.be_new.get(constants.BE_MAXMEM, None)
13342 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13343 ispec_min = ispec.copy()
13344 ispec_min[constants.ISPEC_MEM_SIZE] = \
13345 self.be_new.get(constants.BE_MINMEM, None)
13346 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13348 if (res_max or res_min):
13349 res = set(res_max + res_min)
13350 # FIXME: Improve error message by including information about whether
13351 # the upper or lower limit of the parameter fails the ipolicy.
13352 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13353 (group_info, group_info.name, utils.CommaJoin(res)))
13354 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13356 def _ConvertPlainToDrbd(self, feedback_fn):
13357 """Converts an instance from plain to drbd.
13360 feedback_fn("Converting template to drbd")
13361 instance = self.instance
13362 pnode = instance.primary_node
13363 snode = self.op.remote_node
13365 assert instance.disk_template == constants.DT_PLAIN
13367 # create a fake disk info for _GenerateDiskTemplate
13368 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13369 constants.IDISK_VG: d.logical_id[0]}
13370 for d in instance.disks]
13371 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13372 instance.name, pnode, [snode],
13373 disk_info, None, None, 0, feedback_fn,
13375 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13377 info = _GetInstanceInfoText(instance)
13378 feedback_fn("Creating additional volumes...")
13379 # first, create the missing data and meta devices
13380 for disk in anno_disks:
13381 # unfortunately this is... not too nice
13382 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13384 for child in disk.children:
13385 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13386 # at this stage, all new LVs have been created, we can rename the
13388 feedback_fn("Renaming original volumes...")
13389 rename_list = [(o, n.children[0].logical_id)
13390 for (o, n) in zip(instance.disks, new_disks)]
13391 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13392 result.Raise("Failed to rename original LVs")
13394 feedback_fn("Initializing DRBD devices...")
13395 # all child devices are in place, we can now create the DRBD devices
13396 for disk in anno_disks:
13397 for node in [pnode, snode]:
13398 f_create = node == pnode
13399 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13401 # at this point, the instance has been modified
13402 instance.disk_template = constants.DT_DRBD8
13403 instance.disks = new_disks
13404 self.cfg.Update(instance, feedback_fn)
13406 # Release node locks while waiting for sync
13407 _ReleaseLocks(self, locking.LEVEL_NODE)
13409 # disks are created, waiting for sync
13410 disk_abort = not _WaitForSync(self, instance,
13411 oneshot=not self.op.wait_for_sync)
13413 raise errors.OpExecError("There are some degraded disks for"
13414 " this instance, please cleanup manually")
13416 # Node resource locks will be released by caller
13418 def _ConvertDrbdToPlain(self, feedback_fn):
13419 """Converts an instance from drbd to plain.
13422 instance = self.instance
13424 assert len(instance.secondary_nodes) == 1
13425 assert instance.disk_template == constants.DT_DRBD8
13427 pnode = instance.primary_node
13428 snode = instance.secondary_nodes[0]
13429 feedback_fn("Converting template to plain")
13431 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13432 new_disks = [d.children[0] for d in instance.disks]
13434 # copy over size and mode
13435 for parent, child in zip(old_disks, new_disks):
13436 child.size = parent.size
13437 child.mode = parent.mode
13439 # this is a DRBD disk, return its port to the pool
13440 # NOTE: this must be done right before the call to cfg.Update!
13441 for disk in old_disks:
13442 tcp_port = disk.logical_id[2]
13443 self.cfg.AddTcpUdpPort(tcp_port)
13445 # update instance structure
13446 instance.disks = new_disks
13447 instance.disk_template = constants.DT_PLAIN
13448 self.cfg.Update(instance, feedback_fn)
13450 # Release locks in case removing disks takes a while
13451 _ReleaseLocks(self, locking.LEVEL_NODE)
13453 feedback_fn("Removing volumes on the secondary node...")
13454 for disk in old_disks:
13455 self.cfg.SetDiskID(disk, snode)
13456 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13458 self.LogWarning("Could not remove block device %s on node %s,"
13459 " continuing anyway: %s", disk.iv_name, snode, msg)
13461 feedback_fn("Removing unneeded volumes on the primary node...")
13462 for idx, disk in enumerate(old_disks):
13463 meta = disk.children[1]
13464 self.cfg.SetDiskID(meta, pnode)
13465 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13467 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13468 " continuing anyway: %s", idx, pnode, msg)
13470 def _CreateNewDisk(self, idx, params, _):
13471 """Creates a new disk.
13474 instance = self.instance
13477 if instance.disk_template in constants.DTS_FILEBASED:
13478 (file_driver, file_path) = instance.disks[0].logical_id
13479 file_path = os.path.dirname(file_path)
13481 file_driver = file_path = None
13484 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13485 instance.primary_node, instance.secondary_nodes,
13486 [params], file_path, file_driver, idx,
13487 self.Log, self.diskparams)[0]
13489 info = _GetInstanceInfoText(instance)
13491 logging.info("Creating volume %s for instance %s",
13492 disk.iv_name, instance.name)
13493 # Note: this needs to be kept in sync with _CreateDisks
13495 for node in instance.all_nodes:
13496 f_create = (node == instance.primary_node)
13498 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13499 except errors.OpExecError, err:
13500 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13501 disk.iv_name, disk, node, err)
13504 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13508 def _ModifyDisk(idx, disk, params, _):
13509 """Modifies a disk.
13512 disk.mode = params[constants.IDISK_MODE]
13515 ("disk.mode/%d" % idx, disk.mode),
13518 def _RemoveDisk(self, idx, root, _):
13522 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13523 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13524 self.cfg.SetDiskID(disk, node)
13525 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13527 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13528 " continuing anyway", idx, node, msg)
13530 # if this is a DRBD disk, return its port to the pool
13531 if root.dev_type in constants.LDS_DRBD:
13532 self.cfg.AddTcpUdpPort(root.logical_id[2])
13535 def _CreateNewNic(idx, params, private):
13536 """Creates data structure for a new network interface.
13539 mac = params[constants.INIC_MAC]
13540 ip = params.get(constants.INIC_IP, None)
13541 net = params.get(constants.INIC_NETWORK, None)
13542 #TODO: not private.filled?? can a nic have no nicparams??
13543 nicparams = private.filled
13545 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13547 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13548 (mac, ip, private.filled[constants.NIC_MODE],
13549 private.filled[constants.NIC_LINK],
13554 def _ApplyNicMods(idx, nic, params, private):
13555 """Modifies a network interface.
13560 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13562 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13563 setattr(nic, key, params[key])
13566 nic.nicparams = private.filled
13568 for (key, val) in nic.nicparams.items():
13569 changes.append(("nic.%s/%d" % (key, idx), val))
13573 def Exec(self, feedback_fn):
13574 """Modifies an instance.
13576 All parameters take effect only at the next restart of the instance.
13579 # Process here the warnings from CheckPrereq, as we don't have a
13580 # feedback_fn there.
13581 # TODO: Replace with self.LogWarning
13582 for warn in self.warn:
13583 feedback_fn("WARNING: %s" % warn)
13585 assert ((self.op.disk_template is None) ^
13586 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13587 "Not owning any node resource locks"
13590 instance = self.instance
13593 if self.op.runtime_mem:
13594 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13596 self.op.runtime_mem)
13597 rpcres.Raise("Cannot modify instance runtime memory")
13598 result.append(("runtime_memory", self.op.runtime_mem))
13600 # Apply disk changes
13601 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13602 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13603 _UpdateIvNames(0, instance.disks)
13605 if self.op.disk_template:
13607 check_nodes = set(instance.all_nodes)
13608 if self.op.remote_node:
13609 check_nodes.add(self.op.remote_node)
13610 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13611 owned = self.owned_locks(level)
13612 assert not (check_nodes - owned), \
13613 ("Not owning the correct locks, owning %r, expected at least %r" %
13614 (owned, check_nodes))
13616 r_shut = _ShutdownInstanceDisks(self, instance)
13618 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13619 " proceed with disk template conversion")
13620 mode = (instance.disk_template, self.op.disk_template)
13622 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13624 self.cfg.ReleaseDRBDMinors(instance.name)
13626 result.append(("disk_template", self.op.disk_template))
13628 assert instance.disk_template == self.op.disk_template, \
13629 ("Expected disk template '%s', found '%s'" %
13630 (self.op.disk_template, instance.disk_template))
13632 # Release node and resource locks if there are any (they might already have
13633 # been released during disk conversion)
13634 _ReleaseLocks(self, locking.LEVEL_NODE)
13635 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13637 # Apply NIC changes
13638 if self._new_nics is not None:
13639 instance.nics = self._new_nics
13640 result.extend(self._nic_chgdesc)
13643 if self.op.hvparams:
13644 instance.hvparams = self.hv_inst
13645 for key, val in self.op.hvparams.iteritems():
13646 result.append(("hv/%s" % key, val))
13649 if self.op.beparams:
13650 instance.beparams = self.be_inst
13651 for key, val in self.op.beparams.iteritems():
13652 result.append(("be/%s" % key, val))
13655 if self.op.os_name:
13656 instance.os = self.op.os_name
13659 if self.op.osparams:
13660 instance.osparams = self.os_inst
13661 for key, val in self.op.osparams.iteritems():
13662 result.append(("os/%s" % key, val))
13664 if self.op.offline is None:
13667 elif self.op.offline:
13668 # Mark instance as offline
13669 self.cfg.MarkInstanceOffline(instance.name)
13670 result.append(("admin_state", constants.ADMINST_OFFLINE))
13672 # Mark instance as online, but stopped
13673 self.cfg.MarkInstanceDown(instance.name)
13674 result.append(("admin_state", constants.ADMINST_DOWN))
13676 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13678 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13679 self.owned_locks(locking.LEVEL_NODE)), \
13680 "All node locks should have been released by now"
13684 _DISK_CONVERSIONS = {
13685 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13686 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13690 class LUInstanceChangeGroup(LogicalUnit):
13691 HPATH = "instance-change-group"
13692 HTYPE = constants.HTYPE_INSTANCE
13695 def ExpandNames(self):
13696 self.share_locks = _ShareAll()
13697 self.needed_locks = {
13698 locking.LEVEL_NODEGROUP: [],
13699 locking.LEVEL_NODE: [],
13702 self._ExpandAndLockInstance()
13704 if self.op.target_groups:
13705 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13706 self.op.target_groups)
13708 self.req_target_uuids = None
13710 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13712 def DeclareLocks(self, level):
13713 if level == locking.LEVEL_NODEGROUP:
13714 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13716 if self.req_target_uuids:
13717 lock_groups = set(self.req_target_uuids)
13719 # Lock all groups used by instance optimistically; this requires going
13720 # via the node before it's locked, requiring verification later on
13721 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13722 lock_groups.update(instance_groups)
13724 # No target groups, need to lock all of them
13725 lock_groups = locking.ALL_SET
13727 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13729 elif level == locking.LEVEL_NODE:
13730 if self.req_target_uuids:
13731 # Lock all nodes used by instances
13732 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13733 self._LockInstancesNodes()
13735 # Lock all nodes in all potential target groups
13736 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13737 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13738 member_nodes = [node_name
13739 for group in lock_groups
13740 for node_name in self.cfg.GetNodeGroup(group).members]
13741 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13743 # Lock all nodes as all groups are potential targets
13744 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13746 def CheckPrereq(self):
13747 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13748 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13749 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13751 assert (self.req_target_uuids is None or
13752 owned_groups.issuperset(self.req_target_uuids))
13753 assert owned_instances == set([self.op.instance_name])
13755 # Get instance information
13756 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13758 # Check if node groups for locked instance are still correct
13759 assert owned_nodes.issuperset(self.instance.all_nodes), \
13760 ("Instance %s's nodes changed while we kept the lock" %
13761 self.op.instance_name)
13763 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13766 if self.req_target_uuids:
13767 # User requested specific target groups
13768 self.target_uuids = frozenset(self.req_target_uuids)
13770 # All groups except those used by the instance are potential targets
13771 self.target_uuids = owned_groups - inst_groups
13773 conflicting_groups = self.target_uuids & inst_groups
13774 if conflicting_groups:
13775 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13776 " used by the instance '%s'" %
13777 (utils.CommaJoin(conflicting_groups),
13778 self.op.instance_name),
13779 errors.ECODE_INVAL)
13781 if not self.target_uuids:
13782 raise errors.OpPrereqError("There are no possible target groups",
13783 errors.ECODE_INVAL)
13785 def BuildHooksEnv(self):
13786 """Build hooks env.
13789 assert self.target_uuids
13792 "TARGET_GROUPS": " ".join(self.target_uuids),
13795 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13799 def BuildHooksNodes(self):
13800 """Build hooks nodes.
13803 mn = self.cfg.GetMasterNode()
13804 return ([mn], [mn])
13806 def Exec(self, feedback_fn):
13807 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13809 assert instances == [self.op.instance_name], "Instance not locked"
13811 req = iallocator.IAReqGroupChange(instances=instances,
13812 target_groups=list(self.target_uuids))
13813 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13815 ial.Run(self.op.iallocator)
13817 if not ial.success:
13818 raise errors.OpPrereqError("Can't compute solution for changing group of"
13819 " instance '%s' using iallocator '%s': %s" %
13820 (self.op.instance_name, self.op.iallocator,
13821 ial.info), errors.ECODE_NORES)
13823 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13825 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13826 " instance '%s'", len(jobs), self.op.instance_name)
13828 return ResultWithJobs(jobs)
13831 class LUBackupQuery(NoHooksLU):
13832 """Query the exports list
13837 def CheckArguments(self):
13838 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13839 ["node", "export"], self.op.use_locking)
13841 def ExpandNames(self):
13842 self.expq.ExpandNames(self)
13844 def DeclareLocks(self, level):
13845 self.expq.DeclareLocks(self, level)
13847 def Exec(self, feedback_fn):
13850 for (node, expname) in self.expq.OldStyleQuery(self):
13851 if expname is None:
13852 result[node] = False
13854 result.setdefault(node, []).append(expname)
13859 class _ExportQuery(_QueryBase):
13860 FIELDS = query.EXPORT_FIELDS
13862 #: The node name is not a unique key for this query
13863 SORT_FIELD = "node"
13865 def ExpandNames(self, lu):
13866 lu.needed_locks = {}
13868 # The following variables interact with _QueryBase._GetNames
13870 self.wanted = _GetWantedNodes(lu, self.names)
13872 self.wanted = locking.ALL_SET
13874 self.do_locking = self.use_locking
13876 if self.do_locking:
13877 lu.share_locks = _ShareAll()
13878 lu.needed_locks = {
13879 locking.LEVEL_NODE: self.wanted,
13882 def DeclareLocks(self, lu, level):
13885 def _GetQueryData(self, lu):
13886 """Computes the list of nodes and their attributes.
13889 # Locking is not used
13891 assert not (compat.any(lu.glm.is_owned(level)
13892 for level in locking.LEVELS
13893 if level != locking.LEVEL_CLUSTER) or
13894 self.do_locking or self.use_locking)
13896 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13900 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13902 result.append((node, None))
13904 result.extend((node, expname) for expname in nres.payload)
13909 class LUBackupPrepare(NoHooksLU):
13910 """Prepares an instance for an export and returns useful information.
13915 def ExpandNames(self):
13916 self._ExpandAndLockInstance()
13918 def CheckPrereq(self):
13919 """Check prerequisites.
13922 instance_name = self.op.instance_name
13924 self.instance = self.cfg.GetInstanceInfo(instance_name)
13925 assert self.instance is not None, \
13926 "Cannot retrieve locked instance %s" % self.op.instance_name
13927 _CheckNodeOnline(self, self.instance.primary_node)
13929 self._cds = _GetClusterDomainSecret()
13931 def Exec(self, feedback_fn):
13932 """Prepares an instance for an export.
13935 instance = self.instance
13937 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13938 salt = utils.GenerateSecret(8)
13940 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13941 result = self.rpc.call_x509_cert_create(instance.primary_node,
13942 constants.RIE_CERT_VALIDITY)
13943 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13945 (name, cert_pem) = result.payload
13947 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13951 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13952 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13954 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13960 class LUBackupExport(LogicalUnit):
13961 """Export an instance to an image in the cluster.
13964 HPATH = "instance-export"
13965 HTYPE = constants.HTYPE_INSTANCE
13968 def CheckArguments(self):
13969 """Check the arguments.
13972 self.x509_key_name = self.op.x509_key_name
13973 self.dest_x509_ca_pem = self.op.destination_x509_ca
13975 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13976 if not self.x509_key_name:
13977 raise errors.OpPrereqError("Missing X509 key name for encryption",
13978 errors.ECODE_INVAL)
13980 if not self.dest_x509_ca_pem:
13981 raise errors.OpPrereqError("Missing destination X509 CA",
13982 errors.ECODE_INVAL)
13984 def ExpandNames(self):
13985 self._ExpandAndLockInstance()
13987 # Lock all nodes for local exports
13988 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13989 # FIXME: lock only instance primary and destination node
13991 # Sad but true, for now we have do lock all nodes, as we don't know where
13992 # the previous export might be, and in this LU we search for it and
13993 # remove it from its current node. In the future we could fix this by:
13994 # - making a tasklet to search (share-lock all), then create the
13995 # new one, then one to remove, after
13996 # - removing the removal operation altogether
13997 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13999 def DeclareLocks(self, level):
14000 """Last minute lock declaration."""
14001 # All nodes are locked anyway, so nothing to do here.
14003 def BuildHooksEnv(self):
14004 """Build hooks env.
14006 This will run on the master, primary node and target node.
14010 "EXPORT_MODE": self.op.mode,
14011 "EXPORT_NODE": self.op.target_node,
14012 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14013 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14014 # TODO: Generic function for boolean env variables
14015 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14018 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14022 def BuildHooksNodes(self):
14023 """Build hooks nodes.
14026 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14028 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14029 nl.append(self.op.target_node)
14033 def CheckPrereq(self):
14034 """Check prerequisites.
14036 This checks that the instance and node names are valid.
14039 instance_name = self.op.instance_name
14041 self.instance = self.cfg.GetInstanceInfo(instance_name)
14042 assert self.instance is not None, \
14043 "Cannot retrieve locked instance %s" % self.op.instance_name
14044 _CheckNodeOnline(self, self.instance.primary_node)
14046 if (self.op.remove_instance and
14047 self.instance.admin_state == constants.ADMINST_UP and
14048 not self.op.shutdown):
14049 raise errors.OpPrereqError("Can not remove instance without shutting it"
14050 " down before", errors.ECODE_STATE)
14052 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14053 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14054 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14055 assert self.dst_node is not None
14057 _CheckNodeOnline(self, self.dst_node.name)
14058 _CheckNodeNotDrained(self, self.dst_node.name)
14061 self.dest_disk_info = None
14062 self.dest_x509_ca = None
14064 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14065 self.dst_node = None
14067 if len(self.op.target_node) != len(self.instance.disks):
14068 raise errors.OpPrereqError(("Received destination information for %s"
14069 " disks, but instance %s has %s disks") %
14070 (len(self.op.target_node), instance_name,
14071 len(self.instance.disks)),
14072 errors.ECODE_INVAL)
14074 cds = _GetClusterDomainSecret()
14076 # Check X509 key name
14078 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14079 except (TypeError, ValueError), err:
14080 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14081 errors.ECODE_INVAL)
14083 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14084 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14085 errors.ECODE_INVAL)
14087 # Load and verify CA
14089 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14090 except OpenSSL.crypto.Error, err:
14091 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14092 (err, ), errors.ECODE_INVAL)
14094 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14095 if errcode is not None:
14096 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14097 (msg, ), errors.ECODE_INVAL)
14099 self.dest_x509_ca = cert
14101 # Verify target information
14103 for idx, disk_data in enumerate(self.op.target_node):
14105 (host, port, magic) = \
14106 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14107 except errors.GenericError, err:
14108 raise errors.OpPrereqError("Target info for disk %s: %s" %
14109 (idx, err), errors.ECODE_INVAL)
14111 disk_info.append((host, port, magic))
14113 assert len(disk_info) == len(self.op.target_node)
14114 self.dest_disk_info = disk_info
14117 raise errors.ProgrammerError("Unhandled export mode %r" %
14120 # instance disk type verification
14121 # TODO: Implement export support for file-based disks
14122 for disk in self.instance.disks:
14123 if disk.dev_type == constants.LD_FILE:
14124 raise errors.OpPrereqError("Export not supported for instances with"
14125 " file-based disks", errors.ECODE_INVAL)
14127 def _CleanupExports(self, feedback_fn):
14128 """Removes exports of current instance from all other nodes.
14130 If an instance in a cluster with nodes A..D was exported to node C, its
14131 exports will be removed from the nodes A, B and D.
14134 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14136 nodelist = self.cfg.GetNodeList()
14137 nodelist.remove(self.dst_node.name)
14139 # on one-node clusters nodelist will be empty after the removal
14140 # if we proceed the backup would be removed because OpBackupQuery
14141 # substitutes an empty list with the full cluster node list.
14142 iname = self.instance.name
14144 feedback_fn("Removing old exports for instance %s" % iname)
14145 exportlist = self.rpc.call_export_list(nodelist)
14146 for node in exportlist:
14147 if exportlist[node].fail_msg:
14149 if iname in exportlist[node].payload:
14150 msg = self.rpc.call_export_remove(node, iname).fail_msg
14152 self.LogWarning("Could not remove older export for instance %s"
14153 " on node %s: %s", iname, node, msg)
14155 def Exec(self, feedback_fn):
14156 """Export an instance to an image in the cluster.
14159 assert self.op.mode in constants.EXPORT_MODES
14161 instance = self.instance
14162 src_node = instance.primary_node
14164 if self.op.shutdown:
14165 # shutdown the instance, but not the disks
14166 feedback_fn("Shutting down instance %s" % instance.name)
14167 result = self.rpc.call_instance_shutdown(src_node, instance,
14168 self.op.shutdown_timeout)
14169 # TODO: Maybe ignore failures if ignore_remove_failures is set
14170 result.Raise("Could not shutdown instance %s on"
14171 " node %s" % (instance.name, src_node))
14173 # set the disks ID correctly since call_instance_start needs the
14174 # correct drbd minor to create the symlinks
14175 for disk in instance.disks:
14176 self.cfg.SetDiskID(disk, src_node)
14178 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14181 # Activate the instance disks if we'exporting a stopped instance
14182 feedback_fn("Activating disks for %s" % instance.name)
14183 _StartInstanceDisks(self, instance, None)
14186 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14189 helper.CreateSnapshots()
14191 if (self.op.shutdown and
14192 instance.admin_state == constants.ADMINST_UP and
14193 not self.op.remove_instance):
14194 assert not activate_disks
14195 feedback_fn("Starting instance %s" % instance.name)
14196 result = self.rpc.call_instance_start(src_node,
14197 (instance, None, None), False)
14198 msg = result.fail_msg
14200 feedback_fn("Failed to start instance: %s" % msg)
14201 _ShutdownInstanceDisks(self, instance)
14202 raise errors.OpExecError("Could not start instance: %s" % msg)
14204 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14205 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14206 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14207 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14208 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14210 (key_name, _, _) = self.x509_key_name
14213 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14216 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14217 key_name, dest_ca_pem,
14222 # Check for backwards compatibility
14223 assert len(dresults) == len(instance.disks)
14224 assert compat.all(isinstance(i, bool) for i in dresults), \
14225 "Not all results are boolean: %r" % dresults
14229 feedback_fn("Deactivating disks for %s" % instance.name)
14230 _ShutdownInstanceDisks(self, instance)
14232 if not (compat.all(dresults) and fin_resu):
14235 failures.append("export finalization")
14236 if not compat.all(dresults):
14237 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14239 failures.append("disk export: disk(s) %s" % fdsk)
14241 raise errors.OpExecError("Export failed, errors in %s" %
14242 utils.CommaJoin(failures))
14244 # At this point, the export was successful, we can cleanup/finish
14246 # Remove instance if requested
14247 if self.op.remove_instance:
14248 feedback_fn("Removing instance %s" % instance.name)
14249 _RemoveInstance(self, feedback_fn, instance,
14250 self.op.ignore_remove_failures)
14252 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14253 self._CleanupExports(feedback_fn)
14255 return fin_resu, dresults
14258 class LUBackupRemove(NoHooksLU):
14259 """Remove exports related to the named instance.
14264 def ExpandNames(self):
14265 self.needed_locks = {}
14266 # We need all nodes to be locked in order for RemoveExport to work, but we
14267 # don't need to lock the instance itself, as nothing will happen to it (and
14268 # we can remove exports also for a removed instance)
14269 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14271 def Exec(self, feedback_fn):
14272 """Remove any export.
14275 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14276 # If the instance was not found we'll try with the name that was passed in.
14277 # This will only work if it was an FQDN, though.
14279 if not instance_name:
14281 instance_name = self.op.instance_name
14283 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14284 exportlist = self.rpc.call_export_list(locked_nodes)
14286 for node in exportlist:
14287 msg = exportlist[node].fail_msg
14289 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14291 if instance_name in exportlist[node].payload:
14293 result = self.rpc.call_export_remove(node, instance_name)
14294 msg = result.fail_msg
14296 logging.error("Could not remove export for instance %s"
14297 " on node %s: %s", instance_name, node, msg)
14299 if fqdn_warn and not found:
14300 feedback_fn("Export not found. If trying to remove an export belonging"
14301 " to a deleted instance please use its Fully Qualified"
14305 class LUGroupAdd(LogicalUnit):
14306 """Logical unit for creating node groups.
14309 HPATH = "group-add"
14310 HTYPE = constants.HTYPE_GROUP
14313 def ExpandNames(self):
14314 # We need the new group's UUID here so that we can create and acquire the
14315 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14316 # that it should not check whether the UUID exists in the configuration.
14317 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14318 self.needed_locks = {}
14319 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14321 def CheckPrereq(self):
14322 """Check prerequisites.
14324 This checks that the given group name is not an existing node group
14329 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14330 except errors.OpPrereqError:
14333 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14334 " node group (UUID: %s)" %
14335 (self.op.group_name, existing_uuid),
14336 errors.ECODE_EXISTS)
14338 if self.op.ndparams:
14339 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14341 if self.op.hv_state:
14342 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14344 self.new_hv_state = None
14346 if self.op.disk_state:
14347 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14349 self.new_disk_state = None
14351 if self.op.diskparams:
14352 for templ in constants.DISK_TEMPLATES:
14353 if templ in self.op.diskparams:
14354 utils.ForceDictType(self.op.diskparams[templ],
14355 constants.DISK_DT_TYPES)
14356 self.new_diskparams = self.op.diskparams
14358 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14359 except errors.OpPrereqError, err:
14360 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14361 errors.ECODE_INVAL)
14363 self.new_diskparams = {}
14365 if self.op.ipolicy:
14366 cluster = self.cfg.GetClusterInfo()
14367 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14369 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14370 except errors.ConfigurationError, err:
14371 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14372 errors.ECODE_INVAL)
14374 def BuildHooksEnv(self):
14375 """Build hooks env.
14379 "GROUP_NAME": self.op.group_name,
14382 def BuildHooksNodes(self):
14383 """Build hooks nodes.
14386 mn = self.cfg.GetMasterNode()
14387 return ([mn], [mn])
14389 def Exec(self, feedback_fn):
14390 """Add the node group to the cluster.
14393 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14394 uuid=self.group_uuid,
14395 alloc_policy=self.op.alloc_policy,
14396 ndparams=self.op.ndparams,
14397 diskparams=self.new_diskparams,
14398 ipolicy=self.op.ipolicy,
14399 hv_state_static=self.new_hv_state,
14400 disk_state_static=self.new_disk_state)
14402 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14403 del self.remove_locks[locking.LEVEL_NODEGROUP]
14406 class LUGroupAssignNodes(NoHooksLU):
14407 """Logical unit for assigning nodes to groups.
14412 def ExpandNames(self):
14413 # These raise errors.OpPrereqError on their own:
14414 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14415 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14417 # We want to lock all the affected nodes and groups. We have readily
14418 # available the list of nodes, and the *destination* group. To gather the
14419 # list of "source" groups, we need to fetch node information later on.
14420 self.needed_locks = {
14421 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14422 locking.LEVEL_NODE: self.op.nodes,
14425 def DeclareLocks(self, level):
14426 if level == locking.LEVEL_NODEGROUP:
14427 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14429 # Try to get all affected nodes' groups without having the group or node
14430 # lock yet. Needs verification later in the code flow.
14431 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14433 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14435 def CheckPrereq(self):
14436 """Check prerequisites.
14439 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14440 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14441 frozenset(self.op.nodes))
14443 expected_locks = (set([self.group_uuid]) |
14444 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14445 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14446 if actual_locks != expected_locks:
14447 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14448 " current groups are '%s', used to be '%s'" %
14449 (utils.CommaJoin(expected_locks),
14450 utils.CommaJoin(actual_locks)))
14452 self.node_data = self.cfg.GetAllNodesInfo()
14453 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14454 instance_data = self.cfg.GetAllInstancesInfo()
14456 if self.group is None:
14457 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14458 (self.op.group_name, self.group_uuid))
14460 (new_splits, previous_splits) = \
14461 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14462 for node in self.op.nodes],
14463 self.node_data, instance_data)
14466 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14468 if not self.op.force:
14469 raise errors.OpExecError("The following instances get split by this"
14470 " change and --force was not given: %s" %
14473 self.LogWarning("This operation will split the following instances: %s",
14476 if previous_splits:
14477 self.LogWarning("In addition, these already-split instances continue"
14478 " to be split across groups: %s",
14479 utils.CommaJoin(utils.NiceSort(previous_splits)))
14481 def Exec(self, feedback_fn):
14482 """Assign nodes to a new group.
14485 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14487 self.cfg.AssignGroupNodes(mods)
14490 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14491 """Check for split instances after a node assignment.
14493 This method considers a series of node assignments as an atomic operation,
14494 and returns information about split instances after applying the set of
14497 In particular, it returns information about newly split instances, and
14498 instances that were already split, and remain so after the change.
14500 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14503 @type changes: list of (node_name, new_group_uuid) pairs.
14504 @param changes: list of node assignments to consider.
14505 @param node_data: a dict with data for all nodes
14506 @param instance_data: a dict with all instances to consider
14507 @rtype: a two-tuple
14508 @return: a list of instances that were previously okay and result split as a
14509 consequence of this change, and a list of instances that were previously
14510 split and this change does not fix.
14513 changed_nodes = dict((node, group) for node, group in changes
14514 if node_data[node].group != group)
14516 all_split_instances = set()
14517 previously_split_instances = set()
14519 def InstanceNodes(instance):
14520 return [instance.primary_node] + list(instance.secondary_nodes)
14522 for inst in instance_data.values():
14523 if inst.disk_template not in constants.DTS_INT_MIRROR:
14526 instance_nodes = InstanceNodes(inst)
14528 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14529 previously_split_instances.add(inst.name)
14531 if len(set(changed_nodes.get(node, node_data[node].group)
14532 for node in instance_nodes)) > 1:
14533 all_split_instances.add(inst.name)
14535 return (list(all_split_instances - previously_split_instances),
14536 list(previously_split_instances & all_split_instances))
14539 class _GroupQuery(_QueryBase):
14540 FIELDS = query.GROUP_FIELDS
14542 def ExpandNames(self, lu):
14543 lu.needed_locks = {}
14545 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14546 self._cluster = lu.cfg.GetClusterInfo()
14547 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14550 self.wanted = [name_to_uuid[name]
14551 for name in utils.NiceSort(name_to_uuid.keys())]
14553 # Accept names to be either names or UUIDs.
14556 all_uuid = frozenset(self._all_groups.keys())
14558 for name in self.names:
14559 if name in all_uuid:
14560 self.wanted.append(name)
14561 elif name in name_to_uuid:
14562 self.wanted.append(name_to_uuid[name])
14564 missing.append(name)
14567 raise errors.OpPrereqError("Some groups do not exist: %s" %
14568 utils.CommaJoin(missing),
14569 errors.ECODE_NOENT)
14571 def DeclareLocks(self, lu, level):
14574 def _GetQueryData(self, lu):
14575 """Computes the list of node groups and their attributes.
14578 do_nodes = query.GQ_NODE in self.requested_data
14579 do_instances = query.GQ_INST in self.requested_data
14581 group_to_nodes = None
14582 group_to_instances = None
14584 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14585 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14586 # latter GetAllInstancesInfo() is not enough, for we have to go through
14587 # instance->node. Hence, we will need to process nodes even if we only need
14588 # instance information.
14589 if do_nodes or do_instances:
14590 all_nodes = lu.cfg.GetAllNodesInfo()
14591 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14594 for node in all_nodes.values():
14595 if node.group in group_to_nodes:
14596 group_to_nodes[node.group].append(node.name)
14597 node_to_group[node.name] = node.group
14600 all_instances = lu.cfg.GetAllInstancesInfo()
14601 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14603 for instance in all_instances.values():
14604 node = instance.primary_node
14605 if node in node_to_group:
14606 group_to_instances[node_to_group[node]].append(instance.name)
14609 # Do not pass on node information if it was not requested.
14610 group_to_nodes = None
14612 return query.GroupQueryData(self._cluster,
14613 [self._all_groups[uuid]
14614 for uuid in self.wanted],
14615 group_to_nodes, group_to_instances,
14616 query.GQ_DISKPARAMS in self.requested_data)
14619 class LUGroupQuery(NoHooksLU):
14620 """Logical unit for querying node groups.
14625 def CheckArguments(self):
14626 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14627 self.op.output_fields, False)
14629 def ExpandNames(self):
14630 self.gq.ExpandNames(self)
14632 def DeclareLocks(self, level):
14633 self.gq.DeclareLocks(self, level)
14635 def Exec(self, feedback_fn):
14636 return self.gq.OldStyleQuery(self)
14639 class LUGroupSetParams(LogicalUnit):
14640 """Modifies the parameters of a node group.
14643 HPATH = "group-modify"
14644 HTYPE = constants.HTYPE_GROUP
14647 def CheckArguments(self):
14650 self.op.diskparams,
14651 self.op.alloc_policy,
14653 self.op.disk_state,
14657 if all_changes.count(None) == len(all_changes):
14658 raise errors.OpPrereqError("Please pass at least one modification",
14659 errors.ECODE_INVAL)
14661 def ExpandNames(self):
14662 # This raises errors.OpPrereqError on its own:
14663 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14665 self.needed_locks = {
14666 locking.LEVEL_INSTANCE: [],
14667 locking.LEVEL_NODEGROUP: [self.group_uuid],
14670 self.share_locks[locking.LEVEL_INSTANCE] = 1
14672 def DeclareLocks(self, level):
14673 if level == locking.LEVEL_INSTANCE:
14674 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14676 # Lock instances optimistically, needs verification once group lock has
14678 self.needed_locks[locking.LEVEL_INSTANCE] = \
14679 self.cfg.GetNodeGroupInstances(self.group_uuid)
14682 def _UpdateAndVerifyDiskParams(old, new):
14683 """Updates and verifies disk parameters.
14686 new_params = _GetUpdatedParams(old, new)
14687 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14690 def CheckPrereq(self):
14691 """Check prerequisites.
14694 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14696 # Check if locked instances are still correct
14697 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14699 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14700 cluster = self.cfg.GetClusterInfo()
14702 if self.group is None:
14703 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14704 (self.op.group_name, self.group_uuid))
14706 if self.op.ndparams:
14707 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14708 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14709 self.new_ndparams = new_ndparams
14711 if self.op.diskparams:
14712 diskparams = self.group.diskparams
14713 uavdp = self._UpdateAndVerifyDiskParams
14714 # For each disktemplate subdict update and verify the values
14715 new_diskparams = dict((dt,
14716 uavdp(diskparams.get(dt, {}),
14717 self.op.diskparams[dt]))
14718 for dt in constants.DISK_TEMPLATES
14719 if dt in self.op.diskparams)
14720 # As we've all subdicts of diskparams ready, lets merge the actual
14721 # dict with all updated subdicts
14722 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14724 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14725 except errors.OpPrereqError, err:
14726 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14727 errors.ECODE_INVAL)
14729 if self.op.hv_state:
14730 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14731 self.group.hv_state_static)
14733 if self.op.disk_state:
14734 self.new_disk_state = \
14735 _MergeAndVerifyDiskState(self.op.disk_state,
14736 self.group.disk_state_static)
14738 if self.op.ipolicy:
14739 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14743 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14744 inst_filter = lambda inst: inst.name in owned_instances
14745 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14746 gmi = ganeti.masterd.instance
14748 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14750 new_ipolicy, instances)
14753 self.LogWarning("After the ipolicy change the following instances"
14754 " violate them: %s",
14755 utils.CommaJoin(violations))
14757 def BuildHooksEnv(self):
14758 """Build hooks env.
14762 "GROUP_NAME": self.op.group_name,
14763 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14766 def BuildHooksNodes(self):
14767 """Build hooks nodes.
14770 mn = self.cfg.GetMasterNode()
14771 return ([mn], [mn])
14773 def Exec(self, feedback_fn):
14774 """Modifies the node group.
14779 if self.op.ndparams:
14780 self.group.ndparams = self.new_ndparams
14781 result.append(("ndparams", str(self.group.ndparams)))
14783 if self.op.diskparams:
14784 self.group.diskparams = self.new_diskparams
14785 result.append(("diskparams", str(self.group.diskparams)))
14787 if self.op.alloc_policy:
14788 self.group.alloc_policy = self.op.alloc_policy
14790 if self.op.hv_state:
14791 self.group.hv_state_static = self.new_hv_state
14793 if self.op.disk_state:
14794 self.group.disk_state_static = self.new_disk_state
14796 if self.op.ipolicy:
14797 self.group.ipolicy = self.new_ipolicy
14799 self.cfg.Update(self.group, feedback_fn)
14803 class LUGroupRemove(LogicalUnit):
14804 HPATH = "group-remove"
14805 HTYPE = constants.HTYPE_GROUP
14808 def ExpandNames(self):
14809 # This will raises errors.OpPrereqError on its own:
14810 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14811 self.needed_locks = {
14812 locking.LEVEL_NODEGROUP: [self.group_uuid],
14815 def CheckPrereq(self):
14816 """Check prerequisites.
14818 This checks that the given group name exists as a node group, that is
14819 empty (i.e., contains no nodes), and that is not the last group of the
14823 # Verify that the group is empty.
14824 group_nodes = [node.name
14825 for node in self.cfg.GetAllNodesInfo().values()
14826 if node.group == self.group_uuid]
14829 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14831 (self.op.group_name,
14832 utils.CommaJoin(utils.NiceSort(group_nodes))),
14833 errors.ECODE_STATE)
14835 # Verify the cluster would not be left group-less.
14836 if len(self.cfg.GetNodeGroupList()) == 1:
14837 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14838 " removed" % self.op.group_name,
14839 errors.ECODE_STATE)
14841 def BuildHooksEnv(self):
14842 """Build hooks env.
14846 "GROUP_NAME": self.op.group_name,
14849 def BuildHooksNodes(self):
14850 """Build hooks nodes.
14853 mn = self.cfg.GetMasterNode()
14854 return ([mn], [mn])
14856 def Exec(self, feedback_fn):
14857 """Remove the node group.
14861 self.cfg.RemoveNodeGroup(self.group_uuid)
14862 except errors.ConfigurationError:
14863 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14864 (self.op.group_name, self.group_uuid))
14866 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14869 class LUGroupRename(LogicalUnit):
14870 HPATH = "group-rename"
14871 HTYPE = constants.HTYPE_GROUP
14874 def ExpandNames(self):
14875 # This raises errors.OpPrereqError on its own:
14876 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14878 self.needed_locks = {
14879 locking.LEVEL_NODEGROUP: [self.group_uuid],
14882 def CheckPrereq(self):
14883 """Check prerequisites.
14885 Ensures requested new name is not yet used.
14889 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14890 except errors.OpPrereqError:
14893 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14894 " node group (UUID: %s)" %
14895 (self.op.new_name, new_name_uuid),
14896 errors.ECODE_EXISTS)
14898 def BuildHooksEnv(self):
14899 """Build hooks env.
14903 "OLD_NAME": self.op.group_name,
14904 "NEW_NAME": self.op.new_name,
14907 def BuildHooksNodes(self):
14908 """Build hooks nodes.
14911 mn = self.cfg.GetMasterNode()
14913 all_nodes = self.cfg.GetAllNodesInfo()
14914 all_nodes.pop(mn, None)
14917 run_nodes.extend(node.name for node in all_nodes.values()
14918 if node.group == self.group_uuid)
14920 return (run_nodes, run_nodes)
14922 def Exec(self, feedback_fn):
14923 """Rename the node group.
14926 group = self.cfg.GetNodeGroup(self.group_uuid)
14929 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14930 (self.op.group_name, self.group_uuid))
14932 group.name = self.op.new_name
14933 self.cfg.Update(group, feedback_fn)
14935 return self.op.new_name
14938 class LUGroupEvacuate(LogicalUnit):
14939 HPATH = "group-evacuate"
14940 HTYPE = constants.HTYPE_GROUP
14943 def ExpandNames(self):
14944 # This raises errors.OpPrereqError on its own:
14945 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14947 if self.op.target_groups:
14948 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14949 self.op.target_groups)
14951 self.req_target_uuids = []
14953 if self.group_uuid in self.req_target_uuids:
14954 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14955 " as a target group (targets are %s)" %
14957 utils.CommaJoin(self.req_target_uuids)),
14958 errors.ECODE_INVAL)
14960 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14962 self.share_locks = _ShareAll()
14963 self.needed_locks = {
14964 locking.LEVEL_INSTANCE: [],
14965 locking.LEVEL_NODEGROUP: [],
14966 locking.LEVEL_NODE: [],
14969 def DeclareLocks(self, level):
14970 if level == locking.LEVEL_INSTANCE:
14971 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14973 # Lock instances optimistically, needs verification once node and group
14974 # locks have been acquired
14975 self.needed_locks[locking.LEVEL_INSTANCE] = \
14976 self.cfg.GetNodeGroupInstances(self.group_uuid)
14978 elif level == locking.LEVEL_NODEGROUP:
14979 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14981 if self.req_target_uuids:
14982 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14984 # Lock all groups used by instances optimistically; this requires going
14985 # via the node before it's locked, requiring verification later on
14986 lock_groups.update(group_uuid
14987 for instance_name in
14988 self.owned_locks(locking.LEVEL_INSTANCE)
14990 self.cfg.GetInstanceNodeGroups(instance_name))
14992 # No target groups, need to lock all of them
14993 lock_groups = locking.ALL_SET
14995 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14997 elif level == locking.LEVEL_NODE:
14998 # This will only lock the nodes in the group to be evacuated which
14999 # contain actual instances
15000 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15001 self._LockInstancesNodes()
15003 # Lock all nodes in group to be evacuated and target groups
15004 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15005 assert self.group_uuid in owned_groups
15006 member_nodes = [node_name
15007 for group in owned_groups
15008 for node_name in self.cfg.GetNodeGroup(group).members]
15009 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15011 def CheckPrereq(self):
15012 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15013 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15014 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15016 assert owned_groups.issuperset(self.req_target_uuids)
15017 assert self.group_uuid in owned_groups
15019 # Check if locked instances are still correct
15020 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15022 # Get instance information
15023 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15025 # Check if node groups for locked instances are still correct
15026 _CheckInstancesNodeGroups(self.cfg, self.instances,
15027 owned_groups, owned_nodes, self.group_uuid)
15029 if self.req_target_uuids:
15030 # User requested specific target groups
15031 self.target_uuids = self.req_target_uuids
15033 # All groups except the one to be evacuated are potential targets
15034 self.target_uuids = [group_uuid for group_uuid in owned_groups
15035 if group_uuid != self.group_uuid]
15037 if not self.target_uuids:
15038 raise errors.OpPrereqError("There are no possible target groups",
15039 errors.ECODE_INVAL)
15041 def BuildHooksEnv(self):
15042 """Build hooks env.
15046 "GROUP_NAME": self.op.group_name,
15047 "TARGET_GROUPS": " ".join(self.target_uuids),
15050 def BuildHooksNodes(self):
15051 """Build hooks nodes.
15054 mn = self.cfg.GetMasterNode()
15056 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15058 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15060 return (run_nodes, run_nodes)
15062 def Exec(self, feedback_fn):
15063 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15065 assert self.group_uuid not in self.target_uuids
15067 req = iallocator.IAReqGroupChange(instances=instances,
15068 target_groups=self.target_uuids)
15069 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15071 ial.Run(self.op.iallocator)
15073 if not ial.success:
15074 raise errors.OpPrereqError("Can't compute group evacuation using"
15075 " iallocator '%s': %s" %
15076 (self.op.iallocator, ial.info),
15077 errors.ECODE_NORES)
15079 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15081 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15082 len(jobs), self.op.group_name)
15084 return ResultWithJobs(jobs)
15087 class TagsLU(NoHooksLU): # pylint: disable=W0223
15088 """Generic tags LU.
15090 This is an abstract class which is the parent of all the other tags LUs.
15093 def ExpandNames(self):
15094 self.group_uuid = None
15095 self.needed_locks = {}
15097 if self.op.kind == constants.TAG_NODE:
15098 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15099 lock_level = locking.LEVEL_NODE
15100 lock_name = self.op.name
15101 elif self.op.kind == constants.TAG_INSTANCE:
15102 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15103 lock_level = locking.LEVEL_INSTANCE
15104 lock_name = self.op.name
15105 elif self.op.kind == constants.TAG_NODEGROUP:
15106 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15107 lock_level = locking.LEVEL_NODEGROUP
15108 lock_name = self.group_uuid
15109 elif self.op.kind == constants.TAG_NETWORK:
15110 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15111 lock_level = locking.LEVEL_NETWORK
15112 lock_name = self.network_uuid
15117 if lock_level and getattr(self.op, "use_locking", True):
15118 self.needed_locks[lock_level] = lock_name
15120 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15121 # not possible to acquire the BGL based on opcode parameters)
15123 def CheckPrereq(self):
15124 """Check prerequisites.
15127 if self.op.kind == constants.TAG_CLUSTER:
15128 self.target = self.cfg.GetClusterInfo()
15129 elif self.op.kind == constants.TAG_NODE:
15130 self.target = self.cfg.GetNodeInfo(self.op.name)
15131 elif self.op.kind == constants.TAG_INSTANCE:
15132 self.target = self.cfg.GetInstanceInfo(self.op.name)
15133 elif self.op.kind == constants.TAG_NODEGROUP:
15134 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15135 elif self.op.kind == constants.TAG_NETWORK:
15136 self.target = self.cfg.GetNetwork(self.network_uuid)
15138 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15139 str(self.op.kind), errors.ECODE_INVAL)
15142 class LUTagsGet(TagsLU):
15143 """Returns the tags of a given object.
15148 def ExpandNames(self):
15149 TagsLU.ExpandNames(self)
15151 # Share locks as this is only a read operation
15152 self.share_locks = _ShareAll()
15154 def Exec(self, feedback_fn):
15155 """Returns the tag list.
15158 return list(self.target.GetTags())
15161 class LUTagsSearch(NoHooksLU):
15162 """Searches the tags for a given pattern.
15167 def ExpandNames(self):
15168 self.needed_locks = {}
15170 def CheckPrereq(self):
15171 """Check prerequisites.
15173 This checks the pattern passed for validity by compiling it.
15177 self.re = re.compile(self.op.pattern)
15178 except re.error, err:
15179 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15180 (self.op.pattern, err), errors.ECODE_INVAL)
15182 def Exec(self, feedback_fn):
15183 """Returns the tag list.
15187 tgts = [("/cluster", cfg.GetClusterInfo())]
15188 ilist = cfg.GetAllInstancesInfo().values()
15189 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15190 nlist = cfg.GetAllNodesInfo().values()
15191 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15192 tgts.extend(("/nodegroup/%s" % n.name, n)
15193 for n in cfg.GetAllNodeGroupsInfo().values())
15195 for path, target in tgts:
15196 for tag in target.GetTags():
15197 if self.re.search(tag):
15198 results.append((path, tag))
15202 class LUTagsSet(TagsLU):
15203 """Sets a tag on a given object.
15208 def CheckPrereq(self):
15209 """Check prerequisites.
15211 This checks the type and length of the tag name and value.
15214 TagsLU.CheckPrereq(self)
15215 for tag in self.op.tags:
15216 objects.TaggableObject.ValidateTag(tag)
15218 def Exec(self, feedback_fn):
15223 for tag in self.op.tags:
15224 self.target.AddTag(tag)
15225 except errors.TagError, err:
15226 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15227 self.cfg.Update(self.target, feedback_fn)
15230 class LUTagsDel(TagsLU):
15231 """Delete a list of tags from a given object.
15236 def CheckPrereq(self):
15237 """Check prerequisites.
15239 This checks that we have the given tag.
15242 TagsLU.CheckPrereq(self)
15243 for tag in self.op.tags:
15244 objects.TaggableObject.ValidateTag(tag)
15245 del_tags = frozenset(self.op.tags)
15246 cur_tags = self.target.GetTags()
15248 diff_tags = del_tags - cur_tags
15250 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15251 raise errors.OpPrereqError("Tag(s) %s not found" %
15252 (utils.CommaJoin(diff_names), ),
15253 errors.ECODE_NOENT)
15255 def Exec(self, feedback_fn):
15256 """Remove the tag from the object.
15259 for tag in self.op.tags:
15260 self.target.RemoveTag(tag)
15261 self.cfg.Update(self.target, feedback_fn)
15264 class LUTestDelay(NoHooksLU):
15265 """Sleep for a specified amount of time.
15267 This LU sleeps on the master and/or nodes for a specified amount of
15273 def ExpandNames(self):
15274 """Expand names and set required locks.
15276 This expands the node list, if any.
15279 self.needed_locks = {}
15280 if self.op.on_nodes:
15281 # _GetWantedNodes can be used here, but is not always appropriate to use
15282 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15283 # more information.
15284 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15285 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15287 def _TestDelay(self):
15288 """Do the actual sleep.
15291 if self.op.on_master:
15292 if not utils.TestDelay(self.op.duration):
15293 raise errors.OpExecError("Error during master delay test")
15294 if self.op.on_nodes:
15295 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15296 for node, node_result in result.items():
15297 node_result.Raise("Failure during rpc call to node %s" % node)
15299 def Exec(self, feedback_fn):
15300 """Execute the test delay opcode, with the wanted repetitions.
15303 if self.op.repeat == 0:
15306 top_value = self.op.repeat - 1
15307 for i in range(self.op.repeat):
15308 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15312 class LURestrictedCommand(NoHooksLU):
15313 """Logical unit for executing restricted commands.
15318 def ExpandNames(self):
15320 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15322 self.needed_locks = {
15323 locking.LEVEL_NODE: self.op.nodes,
15325 self.share_locks = {
15326 locking.LEVEL_NODE: not self.op.use_locking,
15329 def CheckPrereq(self):
15330 """Check prerequisites.
15334 def Exec(self, feedback_fn):
15335 """Execute restricted command and return output.
15338 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15340 # Check if correct locks are held
15341 assert set(self.op.nodes).issubset(owned_nodes)
15343 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15347 for node_name in self.op.nodes:
15348 nres = rpcres[node_name]
15350 msg = ("Command '%s' on node '%s' failed: %s" %
15351 (self.op.command, node_name, nres.fail_msg))
15352 result.append((False, msg))
15354 result.append((True, nres.payload))
15359 class LUTestJqueue(NoHooksLU):
15360 """Utility LU to test some aspects of the job queue.
15365 # Must be lower than default timeout for WaitForJobChange to see whether it
15366 # notices changed jobs
15367 _CLIENT_CONNECT_TIMEOUT = 20.0
15368 _CLIENT_CONFIRM_TIMEOUT = 60.0
15371 def _NotifyUsingSocket(cls, cb, errcls):
15372 """Opens a Unix socket and waits for another program to connect.
15375 @param cb: Callback to send socket name to client
15376 @type errcls: class
15377 @param errcls: Exception class to use for errors
15380 # Using a temporary directory as there's no easy way to create temporary
15381 # sockets without writing a custom loop around tempfile.mktemp and
15383 tmpdir = tempfile.mkdtemp()
15385 tmpsock = utils.PathJoin(tmpdir, "sock")
15387 logging.debug("Creating temporary socket at %s", tmpsock)
15388 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15393 # Send details to client
15396 # Wait for client to connect before continuing
15397 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15399 (conn, _) = sock.accept()
15400 except socket.error, err:
15401 raise errcls("Client didn't connect in time (%s)" % err)
15405 # Remove as soon as client is connected
15406 shutil.rmtree(tmpdir)
15408 # Wait for client to close
15411 # pylint: disable=E1101
15412 # Instance of '_socketobject' has no ... member
15413 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15415 except socket.error, err:
15416 raise errcls("Client failed to confirm notification (%s)" % err)
15420 def _SendNotification(self, test, arg, sockname):
15421 """Sends a notification to the client.
15424 @param test: Test name
15425 @param arg: Test argument (depends on test)
15426 @type sockname: string
15427 @param sockname: Socket path
15430 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15432 def _Notify(self, prereq, test, arg):
15433 """Notifies the client of a test.
15436 @param prereq: Whether this is a prereq-phase test
15438 @param test: Test name
15439 @param arg: Test argument (depends on test)
15443 errcls = errors.OpPrereqError
15445 errcls = errors.OpExecError
15447 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15451 def CheckArguments(self):
15452 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15453 self.expandnames_calls = 0
15455 def ExpandNames(self):
15456 checkargs_calls = getattr(self, "checkargs_calls", 0)
15457 if checkargs_calls < 1:
15458 raise errors.ProgrammerError("CheckArguments was not called")
15460 self.expandnames_calls += 1
15462 if self.op.notify_waitlock:
15463 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15465 self.LogInfo("Expanding names")
15467 # Get lock on master node (just to get a lock, not for a particular reason)
15468 self.needed_locks = {
15469 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15472 def Exec(self, feedback_fn):
15473 if self.expandnames_calls < 1:
15474 raise errors.ProgrammerError("ExpandNames was not called")
15476 if self.op.notify_exec:
15477 self._Notify(False, constants.JQT_EXEC, None)
15479 self.LogInfo("Executing")
15481 if self.op.log_messages:
15482 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15483 for idx, msg in enumerate(self.op.log_messages):
15484 self.LogInfo("Sending log message %s", idx + 1)
15485 feedback_fn(constants.JQT_MSGPREFIX + msg)
15486 # Report how many test messages have been sent
15487 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15490 raise errors.OpExecError("Opcode failure was requested")
15495 class LUTestAllocator(NoHooksLU):
15496 """Run allocator tests.
15498 This LU runs the allocator tests
15501 def CheckPrereq(self):
15502 """Check prerequisites.
15504 This checks the opcode parameters depending on the director and mode test.
15507 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15508 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15509 for attr in ["memory", "disks", "disk_template",
15510 "os", "tags", "nics", "vcpus"]:
15511 if not hasattr(self.op, attr):
15512 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15513 attr, errors.ECODE_INVAL)
15514 iname = self.cfg.ExpandInstanceName(self.op.name)
15515 if iname is not None:
15516 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15517 iname, errors.ECODE_EXISTS)
15518 if not isinstance(self.op.nics, list):
15519 raise errors.OpPrereqError("Invalid parameter 'nics'",
15520 errors.ECODE_INVAL)
15521 if not isinstance(self.op.disks, list):
15522 raise errors.OpPrereqError("Invalid parameter 'disks'",
15523 errors.ECODE_INVAL)
15524 for row in self.op.disks:
15525 if (not isinstance(row, dict) or
15526 constants.IDISK_SIZE not in row or
15527 not isinstance(row[constants.IDISK_SIZE], int) or
15528 constants.IDISK_MODE not in row or
15529 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15530 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15531 " parameter", errors.ECODE_INVAL)
15532 if self.op.hypervisor is None:
15533 self.op.hypervisor = self.cfg.GetHypervisorType()
15534 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15535 fname = _ExpandInstanceName(self.cfg, self.op.name)
15536 self.op.name = fname
15537 self.relocate_from = \
15538 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15539 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15540 constants.IALLOCATOR_MODE_NODE_EVAC):
15541 if not self.op.instances:
15542 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15543 self.op.instances = _GetWantedInstances(self, self.op.instances)
15545 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15546 self.op.mode, errors.ECODE_INVAL)
15548 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15549 if self.op.allocator is None:
15550 raise errors.OpPrereqError("Missing allocator name",
15551 errors.ECODE_INVAL)
15552 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15553 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15554 self.op.direction, errors.ECODE_INVAL)
15556 def Exec(self, feedback_fn):
15557 """Run the allocator test.
15560 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15561 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15562 memory=self.op.memory,
15563 disks=self.op.disks,
15564 disk_template=self.op.disk_template,
15568 vcpus=self.op.vcpus,
15569 spindle_use=self.op.spindle_use,
15570 hypervisor=self.op.hypervisor)
15571 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15572 req = iallocator.IAReqRelocate(name=self.op.name,
15573 relocate_from=list(self.relocate_from))
15574 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15575 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15576 target_groups=self.op.target_groups)
15577 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15578 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15579 evac_mode=self.op.evac_mode)
15580 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15581 disk_template = self.op.disk_template
15582 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15583 memory=self.op.memory,
15584 disks=self.op.disks,
15585 disk_template=disk_template,
15589 vcpus=self.op.vcpus,
15590 spindle_use=self.op.spindle_use,
15591 hypervisor=self.op.hypervisor)
15592 for idx in range(self.op.count)]
15593 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15595 raise errors.ProgrammerError("Uncatched mode %s in"
15596 " LUTestAllocator.Exec", self.op.mode)
15598 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15599 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15600 result = ial.in_text
15602 ial.Run(self.op.allocator, validate=False)
15603 result = ial.out_text
15608 class LUNetworkAdd(LogicalUnit):
15609 """Logical unit for creating networks.
15612 HPATH = "network-add"
15613 HTYPE = constants.HTYPE_NETWORK
15616 def BuildHooksNodes(self):
15617 """Build hooks nodes.
15620 mn = self.cfg.GetMasterNode()
15621 return ([mn], [mn])
15623 def ExpandNames(self):
15624 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15625 self.needed_locks = {}
15626 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15628 def CheckPrereq(self):
15629 """Check prerequisites.
15631 This checks that the given group name is not an existing node group
15635 if self.op.network is None:
15636 raise errors.OpPrereqError("Network must be given",
15637 errors.ECODE_INVAL)
15639 uuid = self.cfg.LookupNetwork(self.op.network_name)
15642 raise errors.OpPrereqError("Network '%s' already defined" %
15643 self.op.network, errors.ECODE_EXISTS)
15645 if self.op.mac_prefix:
15646 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15648 # Check tag validity
15649 for tag in self.op.tags:
15650 objects.TaggableObject.ValidateTag(tag)
15652 def BuildHooksEnv(self):
15653 """Build hooks env.
15657 "name": self.op.network_name,
15658 "subnet": self.op.network,
15659 "gateway": self.op.gateway,
15660 "network6": self.op.network6,
15661 "gateway6": self.op.gateway6,
15662 "mac_prefix": self.op.mac_prefix,
15663 "network_type": self.op.network_type,
15664 "tags": self.op.tags,
15666 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15668 def Exec(self, feedback_fn):
15669 """Add the ip pool to the cluster.
15672 nobj = objects.Network(name=self.op.network_name,
15673 network=self.op.network,
15674 gateway=self.op.gateway,
15675 network6=self.op.network6,
15676 gateway6=self.op.gateway6,
15677 mac_prefix=self.op.mac_prefix,
15678 network_type=self.op.network_type,
15679 uuid=self.network_uuid,
15681 # Initialize the associated address pool
15683 pool = network.AddressPool.InitializeNetwork(nobj)
15684 except errors.AddressPoolError, e:
15685 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15687 # Check if we need to reserve the nodes and the cluster master IP
15688 # These may not be allocated to any instances in routed mode, as
15689 # they wouldn't function anyway.
15690 for node in self.cfg.GetAllNodesInfo().values():
15691 for ip in [node.primary_ip, node.secondary_ip]:
15694 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15696 except errors.AddressPoolError:
15699 master_ip = self.cfg.GetClusterInfo().master_ip
15701 pool.Reserve(master_ip)
15702 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15703 except errors.AddressPoolError:
15706 if self.op.add_reserved_ips:
15707 for ip in self.op.add_reserved_ips:
15709 pool.Reserve(ip, external=True)
15710 except errors.AddressPoolError, e:
15711 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15714 for tag in self.op.tags:
15717 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15718 del self.remove_locks[locking.LEVEL_NETWORK]
15721 class LUNetworkRemove(LogicalUnit):
15722 HPATH = "network-remove"
15723 HTYPE = constants.HTYPE_NETWORK
15726 def ExpandNames(self):
15727 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15729 if not self.network_uuid:
15730 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15731 errors.ECODE_INVAL)
15732 self.needed_locks = {
15733 locking.LEVEL_NETWORK: [self.network_uuid],
15736 def CheckPrereq(self):
15737 """Check prerequisites.
15739 This checks that the given network name exists as a network, that is
15740 empty (i.e., contains no nodes), and that is not the last group of the
15745 # Verify that the network is not conncted.
15746 node_groups = [group.name
15747 for group in self.cfg.GetAllNodeGroupsInfo().values()
15748 for net in group.networks.keys()
15749 if net == self.network_uuid]
15752 self.LogWarning("Nework '%s' is connected to the following"
15753 " node groups: %s" % (self.op.network_name,
15754 utils.CommaJoin(utils.NiceSort(node_groups))))
15755 raise errors.OpPrereqError("Network still connected",
15756 errors.ECODE_STATE)
15758 def BuildHooksEnv(self):
15759 """Build hooks env.
15763 "NETWORK_NAME": self.op.network_name,
15766 def BuildHooksNodes(self):
15767 """Build hooks nodes.
15770 mn = self.cfg.GetMasterNode()
15771 return ([mn], [mn])
15773 def Exec(self, feedback_fn):
15774 """Remove the network.
15778 self.cfg.RemoveNetwork(self.network_uuid)
15779 except errors.ConfigurationError:
15780 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15781 (self.op.network_name, self.network_uuid))
15784 class LUNetworkSetParams(LogicalUnit):
15785 """Modifies the parameters of a network.
15788 HPATH = "network-modify"
15789 HTYPE = constants.HTYPE_NETWORK
15792 def CheckArguments(self):
15793 if (self.op.gateway and
15794 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15795 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15796 " at once", errors.ECODE_INVAL)
15798 def ExpandNames(self):
15799 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15800 self.network = self.cfg.GetNetwork(self.network_uuid)
15801 if self.network is None:
15802 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15803 (self.op.network_name, self.network_uuid),
15804 errors.ECODE_INVAL)
15805 self.needed_locks = {
15806 locking.LEVEL_NETWORK: [self.network_uuid],
15809 def CheckPrereq(self):
15810 """Check prerequisites.
15813 self.gateway = self.network.gateway
15814 self.network_type = self.network.network_type
15815 self.mac_prefix = self.network.mac_prefix
15816 self.network6 = self.network.network6
15817 self.gateway6 = self.network.gateway6
15818 self.tags = self.network.tags
15820 self.pool = network.AddressPool(self.network)
15822 if self.op.gateway:
15823 if self.op.gateway == constants.VALUE_NONE:
15824 self.gateway = None
15826 self.gateway = self.op.gateway
15827 if self.pool.IsReserved(self.gateway):
15828 raise errors.OpPrereqError("%s is already reserved" %
15829 self.gateway, errors.ECODE_INVAL)
15831 if self.op.network_type:
15832 if self.op.network_type == constants.VALUE_NONE:
15833 self.network_type = None
15835 self.network_type = self.op.network_type
15837 if self.op.mac_prefix:
15838 if self.op.mac_prefix == constants.VALUE_NONE:
15839 self.mac_prefix = None
15841 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15842 self.mac_prefix = self.op.mac_prefix
15844 if self.op.gateway6:
15845 if self.op.gateway6 == constants.VALUE_NONE:
15846 self.gateway6 = None
15848 self.gateway6 = self.op.gateway6
15850 if self.op.network6:
15851 if self.op.network6 == constants.VALUE_NONE:
15852 self.network6 = None
15854 self.network6 = self.op.network6
15856 def BuildHooksEnv(self):
15857 """Build hooks env.
15861 "name": self.op.network_name,
15862 "subnet": self.network.network,
15863 "gateway": self.gateway,
15864 "network6": self.network6,
15865 "gateway6": self.gateway6,
15866 "mac_prefix": self.mac_prefix,
15867 "network_type": self.network_type,
15870 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15872 def BuildHooksNodes(self):
15873 """Build hooks nodes.
15876 mn = self.cfg.GetMasterNode()
15877 return ([mn], [mn])
15879 def Exec(self, feedback_fn):
15880 """Modifies the network.
15883 #TODO: reserve/release via temporary reservation manager
15884 # extend cfg.ReserveIp/ReleaseIp with the external flag
15885 if self.op.gateway:
15886 if self.gateway == self.network.gateway:
15887 self.LogWarning("Gateway is already %s" % self.gateway)
15890 self.pool.Reserve(self.gateway, external=True)
15891 if self.network.gateway:
15892 self.pool.Release(self.network.gateway, external=True)
15893 self.network.gateway = self.gateway
15895 if self.op.add_reserved_ips:
15896 for ip in self.op.add_reserved_ips:
15898 if self.pool.IsReserved(ip):
15899 self.LogWarning("IP %s is already reserved" % ip)
15901 self.pool.Reserve(ip, external=True)
15902 except errors.AddressPoolError, e:
15903 self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
15905 if self.op.remove_reserved_ips:
15906 for ip in self.op.remove_reserved_ips:
15907 if ip == self.network.gateway:
15908 self.LogWarning("Cannot unreserve Gateway's IP")
15911 if not self.pool.IsReserved(ip):
15912 self.LogWarning("IP %s is already unreserved" % ip)
15914 self.pool.Release(ip, external=True)
15915 except errors.AddressPoolError, e:
15916 self.LogWarning("Cannot release ip %s. %s" % (ip, e))
15918 if self.op.mac_prefix:
15919 self.network.mac_prefix = self.mac_prefix
15921 if self.op.network6:
15922 self.network.network6 = self.network6
15924 if self.op.gateway6:
15925 self.network.gateway6 = self.gateway6
15927 if self.op.network_type:
15928 self.network.network_type = self.network_type
15930 self.pool.Validate()
15932 self.cfg.Update(self.network, feedback_fn)
15935 class _NetworkQuery(_QueryBase):
15936 FIELDS = query.NETWORK_FIELDS
15938 def ExpandNames(self, lu):
15939 lu.needed_locks = {}
15941 self._all_networks = lu.cfg.GetAllNetworksInfo()
15942 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15945 self.wanted = [name_to_uuid[name]
15946 for name in utils.NiceSort(name_to_uuid.keys())]
15948 # Accept names to be either names or UUIDs.
15951 all_uuid = frozenset(self._all_networks.keys())
15953 for name in self.names:
15954 if name in all_uuid:
15955 self.wanted.append(name)
15956 elif name in name_to_uuid:
15957 self.wanted.append(name_to_uuid[name])
15959 missing.append(name)
15962 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15963 errors.ECODE_NOENT)
15965 def DeclareLocks(self, lu, level):
15968 def _GetQueryData(self, lu):
15969 """Computes the list of networks and their attributes.
15972 do_instances = query.NETQ_INST in self.requested_data
15973 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15974 do_stats = query.NETQ_STATS in self.requested_data
15976 network_to_groups = None
15977 network_to_instances = None
15980 # For NETQ_GROUP, we need to map network->[groups]
15982 all_groups = lu.cfg.GetAllNodeGroupsInfo()
15983 network_to_groups = dict((uuid, []) for uuid in self.wanted)
15986 all_instances = lu.cfg.GetAllInstancesInfo()
15987 all_nodes = lu.cfg.GetAllNodesInfo()
15988 network_to_instances = dict((uuid, []) for uuid in self.wanted)
15990 for group in all_groups.values():
15992 group_nodes = [node.name for node in all_nodes.values() if
15993 node.group == group.uuid]
15994 group_instances = [instance for instance in all_instances.values()
15995 if instance.primary_node in group_nodes]
15997 for net_uuid in group.networks.keys():
15998 if net_uuid in network_to_groups:
15999 netparams = group.networks[net_uuid]
16000 mode = netparams[constants.NIC_MODE]
16001 link = netparams[constants.NIC_LINK]
16002 info = group.name + '(' + mode + ', ' + link + ')'
16003 network_to_groups[net_uuid].append(info)
16006 for instance in group_instances:
16007 for nic in instance.nics:
16008 if nic.network == self._all_networks[net_uuid].name:
16009 network_to_instances[net_uuid].append(instance.name)
16014 for uuid, net in self._all_networks.items():
16015 if uuid in self.wanted:
16016 pool = network.AddressPool(net)
16018 "free_count": pool.GetFreeCount(),
16019 "reserved_count": pool.GetReservedCount(),
16020 "map": pool.GetMap(),
16021 "external_reservations": ", ".join(pool.GetExternalReservations()),
16024 return query.NetworkQueryData([self._all_networks[uuid]
16025 for uuid in self.wanted],
16027 network_to_instances,
16031 class LUNetworkQuery(NoHooksLU):
16032 """Logical unit for querying networks.
16037 def CheckArguments(self):
16038 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16039 self.op.output_fields, False)
16041 def ExpandNames(self):
16042 self.nq.ExpandNames(self)
16044 def Exec(self, feedback_fn):
16045 return self.nq.OldStyleQuery(self)
16048 class LUNetworkConnect(LogicalUnit):
16049 """Connect a network to a nodegroup
16052 HPATH = "network-connect"
16053 HTYPE = constants.HTYPE_NETWORK
16056 def ExpandNames(self):
16057 self.network_name = self.op.network_name
16058 self.group_name = self.op.group_name
16059 self.network_mode = self.op.network_mode
16060 self.network_link = self.op.network_link
16062 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16063 self.network = self.cfg.GetNetwork(self.network_uuid)
16064 if self.network is None:
16065 raise errors.OpPrereqError("Network %s does not exist" %
16066 self.network_name, errors.ECODE_INVAL)
16068 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16069 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16070 if self.group is None:
16071 raise errors.OpPrereqError("Group %s does not exist" %
16072 self.group_name, errors.ECODE_INVAL)
16074 self.needed_locks = {
16075 locking.LEVEL_INSTANCE: [],
16076 locking.LEVEL_NODEGROUP: [self.group_uuid],
16078 self.share_locks[locking.LEVEL_INSTANCE] = 1
16080 def DeclareLocks(self, level):
16081 if level == locking.LEVEL_INSTANCE:
16082 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16084 # Lock instances optimistically, needs verification once group lock has
16086 self.needed_locks[locking.LEVEL_INSTANCE] = \
16087 self.cfg.GetNodeGroupInstances(self.group_uuid)
16089 def BuildHooksEnv(self):
16091 ret["GROUP_NAME"] = self.group_name
16092 ret["GROUP_NETWORK_MODE"] = self.network_mode
16093 ret["GROUP_NETWORK_LINK"] = self.network_link
16094 ret.update(_BuildNetworkHookEnvByObject(self.network))
16097 def BuildHooksNodes(self):
16098 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16099 return (nodes, nodes)
16101 def CheckPrereq(self):
16102 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16105 self.netparams = dict()
16106 self.netparams[constants.NIC_MODE] = self.network_mode
16107 self.netparams[constants.NIC_LINK] = self.network_link
16108 objects.NIC.CheckParameterSyntax(self.netparams)
16110 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16111 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16112 self.connected = False
16113 if self.network_uuid in self.group.networks:
16114 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16115 (self.network_name, self.group.name))
16116 self.connected = True
16119 pool = network.AddressPool(self.network)
16120 if self.op.conflicts_check:
16121 groupinstances = []
16122 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16123 groupinstances.append(self.cfg.GetInstanceInfo(n))
16124 instances = [(instance.name, idx, nic.ip)
16125 for instance in groupinstances
16126 for idx, nic in enumerate(instance.nics)
16127 if (not nic.network and pool.Contains(nic.ip))]
16129 self.LogWarning("Following occurences use IPs from network %s"
16130 " that is about to connect to nodegroup %s: %s" %
16131 (self.network_name, self.group.name,
16133 raise errors.OpPrereqError("Conflicting IPs found."
16134 " Please remove/modify"
16135 " corresponding NICs",
16136 errors.ECODE_INVAL)
16138 def Exec(self, feedback_fn):
16142 self.group.networks[self.network_uuid] = self.netparams
16143 self.cfg.Update(self.group, feedback_fn)
16146 class LUNetworkDisconnect(LogicalUnit):
16147 """Disconnect a network to a nodegroup
16150 HPATH = "network-disconnect"
16151 HTYPE = constants.HTYPE_NETWORK
16154 def ExpandNames(self):
16155 self.network_name = self.op.network_name
16156 self.group_name = self.op.group_name
16158 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16159 self.network = self.cfg.GetNetwork(self.network_uuid)
16160 if self.network is None:
16161 raise errors.OpPrereqError("Network %s does not exist" %
16162 self.network_name, errors.ECODE_INVAL)
16164 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16165 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16166 if self.group is None:
16167 raise errors.OpPrereqError("Group %s does not exist" %
16168 self.group_name, errors.ECODE_INVAL)
16170 self.needed_locks = {
16171 locking.LEVEL_INSTANCE: [],
16172 locking.LEVEL_NODEGROUP: [self.group_uuid],
16174 self.share_locks[locking.LEVEL_INSTANCE] = 1
16176 def DeclareLocks(self, level):
16177 if level == locking.LEVEL_INSTANCE:
16178 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16180 # Lock instances optimistically, needs verification once group lock has
16182 self.needed_locks[locking.LEVEL_INSTANCE] = \
16183 self.cfg.GetNodeGroupInstances(self.group_uuid)
16185 def BuildHooksEnv(self):
16187 ret["GROUP_NAME"] = self.group_name
16188 ret.update(_BuildNetworkHookEnvByObject(self.network))
16191 def BuildHooksNodes(self):
16192 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16193 return (nodes, nodes)
16195 def CheckPrereq(self):
16196 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16199 self.connected = True
16200 if self.network_uuid not in self.group.networks:
16201 self.LogWarning("Network '%s' is"
16202 " not mapped to group '%s'" %
16203 (self.network_name, self.group.name))
16204 self.connected = False
16207 if self.op.conflicts_check:
16208 groupinstances = []
16209 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16210 groupinstances.append(self.cfg.GetInstanceInfo(n))
16211 instances = [(instance.name, idx, nic.ip)
16212 for instance in groupinstances
16213 for idx, nic in enumerate(instance.nics)
16214 if nic.network == self.network_name]
16216 self.LogWarning("Following occurences use IPs from network %s"
16217 " that is about to disconnected from the nodegroup"
16219 (self.network_name, self.group.name,
16221 raise errors.OpPrereqError("Conflicting IPs."
16222 " Please remove/modify"
16223 " corresponding NICS",
16224 errors.ECODE_INVAL)
16226 def Exec(self, feedback_fn):
16227 if not self.connected:
16230 del self.group.networks[self.network_uuid]
16231 self.cfg.Update(self.group, feedback_fn)
16234 #: Query type implementations
16236 constants.QR_CLUSTER: _ClusterQuery,
16237 constants.QR_INSTANCE: _InstanceQuery,
16238 constants.QR_NODE: _NodeQuery,
16239 constants.QR_GROUP: _GroupQuery,
16240 constants.QR_NETWORK: _NetworkQuery,
16241 constants.QR_OS: _OsQuery,
16242 constants.QR_EXPORT: _ExportQuery,
16245 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16248 def _GetQueryImplementation(name):
16249 """Returns the implemtnation for a query type.
16251 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16255 return _QUERY_IMPL[name]
16257 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16258 errors.ECODE_INVAL)
16261 def _CheckForConflictingIp(lu, ip, node):
16262 """In case of conflicting ip raise error.
16265 @param ip: ip address
16267 @param node: node name
16270 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16271 if conf_net is not None:
16272 raise errors.OpPrereqError("Conflicting IP found:"
16273 " %s <> %s." % (ip, conf_net),
16274 errors.ECODE_INVAL)
16276 return (None, None)