4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti.masterd import iallocator
64 import ganeti.masterd.instance # pylint: disable=W0611
68 INSTANCE_DOWN = [constants.ADMINST_DOWN]
69 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
70 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
72 #: Instance status in which an instance can be marked as offline/online
73 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
74 constants.ADMINST_OFFLINE,
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that
197 level (note that in this case C{DeclareLocks} won't be called
198 at all for that level)
199 - if you need locks at a level, but you can't calculate it in
200 this function, initialise that level with an empty list and do
201 further processing in L{LogicalUnit.DeclareLocks} (see that
202 function's docstring)
203 - don't put anything for the BGL level
204 - if you want all locks at a level use L{locking.ALL_SET} as a value
206 If you need to share locks (rather than acquire them exclusively) at one
207 level you can modify self.share_locks, setting a true value (usually 1) for
208 that level. By default locks are not shared.
210 This function can also define a list of tasklets, which then will be
211 executed in order instead of the usual LU-level CheckPrereq and Exec
212 functions, if those are not defined by the LU.
216 # Acquire all nodes and one instance
217 self.needed_locks = {
218 locking.LEVEL_NODE: locking.ALL_SET,
219 locking.LEVEL_INSTANCE: ['instance1.example.com'],
221 # Acquire just two nodes
222 self.needed_locks = {
223 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
226 self.needed_locks = {} # No, you can't leave it to the default value None
229 # The implementation of this method is mandatory only if the new LU is
230 # concurrent, so that old LUs don't need to be changed all at the same
233 self.needed_locks = {} # Exclusive LUs don't need locks.
235 raise NotImplementedError
237 def DeclareLocks(self, level):
238 """Declare LU locking needs for a level
240 While most LUs can just declare their locking needs at ExpandNames time,
241 sometimes there's the need to calculate some locks after having acquired
242 the ones before. This function is called just before acquiring locks at a
243 particular level, but after acquiring the ones at lower levels, and permits
244 such calculations. It can be used to modify self.needed_locks, and by
245 default it does nothing.
247 This function is only called if you have something already set in
248 self.needed_locks for the level.
250 @param level: Locking level which is going to be locked
251 @type level: member of L{ganeti.locking.LEVELS}
255 def CheckPrereq(self):
256 """Check prerequisites for this LU.
258 This method should check that the prerequisites for the execution
259 of this LU are fulfilled. It can do internode communication, but
260 it should be idempotent - no cluster or system changes are
263 The method should raise errors.OpPrereqError in case something is
264 not fulfilled. Its return value is ignored.
266 This method should also update all the parameters of the opcode to
267 their canonical form if it hasn't been done by ExpandNames before.
270 if self.tasklets is not None:
271 for (idx, tl) in enumerate(self.tasklets):
272 logging.debug("Checking prerequisites for tasklet %s/%s",
273 idx + 1, len(self.tasklets))
278 def Exec(self, feedback_fn):
281 This method should implement the actual work. It should raise
282 errors.OpExecError for failures that are somewhat dealt with in
286 if self.tasklets is not None:
287 for (idx, tl) in enumerate(self.tasklets):
288 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
291 raise NotImplementedError
293 def BuildHooksEnv(self):
294 """Build hooks environment for this LU.
297 @return: Dictionary containing the environment that will be used for
298 running the hooks for this LU. The keys of the dict must not be prefixed
299 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
300 will extend the environment with additional variables. If no environment
301 should be defined, an empty dictionary should be returned (not C{None}).
302 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
306 raise NotImplementedError
308 def BuildHooksNodes(self):
309 """Build list of nodes to run LU's hooks.
311 @rtype: tuple; (list, list)
312 @return: Tuple containing a list of node names on which the hook
313 should run before the execution and a list of node names on which the
314 hook should run after the execution. No nodes should be returned as an
315 empty list (and not None).
316 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
320 raise NotImplementedError
322 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
323 """Notify the LU about the results of its hooks.
325 This method is called every time a hooks phase is executed, and notifies
326 the Logical Unit about the hooks' result. The LU can then use it to alter
327 its result based on the hooks. By default the method does nothing and the
328 previous result is passed back unchanged but any LU can define it if it
329 wants to use the local cluster hook-scripts somehow.
331 @param phase: one of L{constants.HOOKS_PHASE_POST} or
332 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
333 @param hook_results: the results of the multi-node hooks rpc call
334 @param feedback_fn: function used send feedback back to the caller
335 @param lu_result: the previous Exec result this LU had, or None
337 @return: the new Exec result, based on the previous result
341 # API must be kept, thus we ignore the unused argument and could
342 # be a function warnings
343 # pylint: disable=W0613,R0201
346 def _ExpandAndLockInstance(self):
347 """Helper function to expand and lock an instance.
349 Many LUs that work on an instance take its name in self.op.instance_name
350 and need to expand it and then declare the expanded name for locking. This
351 function does it, and then updates self.op.instance_name to the expanded
352 name. It also initializes needed_locks as a dict, if this hasn't been done
356 if self.needed_locks is None:
357 self.needed_locks = {}
359 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
360 "_ExpandAndLockInstance called with instance-level locks set"
361 self.op.instance_name = _ExpandInstanceName(self.cfg,
362 self.op.instance_name)
363 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
365 def _LockInstancesNodes(self, primary_only=False,
366 level=locking.LEVEL_NODE):
367 """Helper function to declare instances' nodes for locking.
369 This function should be called after locking one or more instances to lock
370 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
371 with all primary or secondary nodes for instances already locked and
372 present in self.needed_locks[locking.LEVEL_INSTANCE].
374 It should be called from DeclareLocks, and for safety only works if
375 self.recalculate_locks[locking.LEVEL_NODE] is set.
377 In the future it may grow parameters to just lock some instance's nodes, or
378 to just lock primaries or secondary nodes, if needed.
380 If should be called in DeclareLocks in a way similar to::
382 if level == locking.LEVEL_NODE:
383 self._LockInstancesNodes()
385 @type primary_only: boolean
386 @param primary_only: only lock primary nodes of locked instances
387 @param level: Which lock level to use for locking nodes
390 assert level in self.recalculate_locks, \
391 "_LockInstancesNodes helper function called with no nodes to recalculate"
393 # TODO: check if we're really been called with the instance locks held
395 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
396 # future we might want to have different behaviors depending on the value
397 # of self.recalculate_locks[locking.LEVEL_NODE]
399 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
400 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
401 wanted_nodes.append(instance.primary_node)
403 wanted_nodes.extend(instance.secondary_nodes)
405 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
406 self.needed_locks[level] = wanted_nodes
407 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
408 self.needed_locks[level].extend(wanted_nodes)
410 raise errors.ProgrammerError("Unknown recalculation mode")
412 del self.recalculate_locks[level]
415 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
416 """Simple LU which runs no hooks.
418 This LU is intended as a parent for other LogicalUnits which will
419 run no hooks, in order to reduce duplicate code.
425 def BuildHooksEnv(self):
426 """Empty BuildHooksEnv for NoHooksLu.
428 This just raises an error.
431 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
433 def BuildHooksNodes(self):
434 """Empty BuildHooksNodes for NoHooksLU.
437 raise AssertionError("BuildHooksNodes called for NoHooksLU")
441 """Tasklet base class.
443 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
444 they can mix legacy code with tasklets. Locking needs to be done in the LU,
445 tasklets know nothing about locks.
447 Subclasses must follow these rules:
448 - Implement CheckPrereq
452 def __init__(self, lu):
459 def CheckPrereq(self):
460 """Check prerequisites for this tasklets.
462 This method should check whether the prerequisites for the execution of
463 this tasklet are fulfilled. It can do internode communication, but it
464 should be idempotent - no cluster or system changes are allowed.
466 The method should raise errors.OpPrereqError in case something is not
467 fulfilled. Its return value is ignored.
469 This method should also update all parameters to their canonical form if it
470 hasn't been done before.
475 def Exec(self, feedback_fn):
476 """Execute the tasklet.
478 This method should implement the actual work. It should raise
479 errors.OpExecError for failures that are somewhat dealt with in code, or
483 raise NotImplementedError
487 """Base for query utility classes.
490 #: Attribute holding field definitions
496 def __init__(self, qfilter, fields, use_locking):
497 """Initializes this class.
500 self.use_locking = use_locking
502 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
503 namefield=self.SORT_FIELD)
504 self.requested_data = self.query.RequestedData()
505 self.names = self.query.RequestedNames()
507 # Sort only if no names were requested
508 self.sort_by_name = not self.names
510 self.do_locking = None
513 def _GetNames(self, lu, all_names, lock_level):
514 """Helper function to determine names asked for in the query.
518 names = lu.owned_locks(lock_level)
522 if self.wanted == locking.ALL_SET:
523 assert not self.names
524 # caller didn't specify names, so ordering is not important
525 return utils.NiceSort(names)
527 # caller specified names and we must keep the same order
529 assert not self.do_locking or lu.glm.is_owned(lock_level)
531 missing = set(self.wanted).difference(names)
533 raise errors.OpExecError("Some items were removed before retrieving"
534 " their data: %s" % missing)
536 # Return expanded names
539 def ExpandNames(self, lu):
540 """Expand names for this query.
542 See L{LogicalUnit.ExpandNames}.
545 raise NotImplementedError()
547 def DeclareLocks(self, lu, level):
548 """Declare locks for this query.
550 See L{LogicalUnit.DeclareLocks}.
553 raise NotImplementedError()
555 def _GetQueryData(self, lu):
556 """Collects all data for this query.
558 @return: Query data object
561 raise NotImplementedError()
563 def NewStyleQuery(self, lu):
564 """Collect data and execute query.
567 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
568 sort_by_name=self.sort_by_name)
570 def OldStyleQuery(self, lu):
571 """Collect data and execute query.
574 return self.query.OldStyleQuery(self._GetQueryData(lu),
575 sort_by_name=self.sort_by_name)
579 """Returns a dict declaring all lock levels shared.
582 return dict.fromkeys(locking.LEVELS, 1)
585 def _AnnotateDiskParams(instance, devs, cfg):
586 """Little helper wrapper to the rpc annotation method.
588 @param instance: The instance object
589 @type devs: List of L{objects.Disk}
590 @param devs: The root devices (not any of its children!)
591 @param cfg: The config object
592 @returns The annotated disk copies
593 @see L{rpc.AnnotateDiskParams}
596 return rpc.AnnotateDiskParams(instance.disk_template, devs,
597 cfg.GetInstanceDiskParams(instance))
600 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
602 """Checks if node groups for locked instances are still correct.
604 @type cfg: L{config.ConfigWriter}
605 @param cfg: Cluster configuration
606 @type instances: dict; string as key, L{objects.Instance} as value
607 @param instances: Dictionary, instance name as key, instance object as value
608 @type owned_groups: iterable of string
609 @param owned_groups: List of owned groups
610 @type owned_nodes: iterable of string
611 @param owned_nodes: List of owned nodes
612 @type cur_group_uuid: string or None
613 @param cur_group_uuid: Optional group UUID to check against instance's groups
616 for (name, inst) in instances.items():
617 assert owned_nodes.issuperset(inst.all_nodes), \
618 "Instance %s's nodes changed while we kept the lock" % name
620 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
622 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
623 "Instance %s has no node in group %s" % (name, cur_group_uuid)
626 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
628 """Checks if the owned node groups are still correct for an instance.
630 @type cfg: L{config.ConfigWriter}
631 @param cfg: The cluster configuration
632 @type instance_name: string
633 @param instance_name: Instance name
634 @type owned_groups: set or frozenset
635 @param owned_groups: List of currently owned node groups
636 @type primary_only: boolean
637 @param primary_only: Whether to check node groups for only the primary node
640 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
642 if not owned_groups.issuperset(inst_groups):
643 raise errors.OpPrereqError("Instance %s's node groups changed since"
644 " locks were acquired, current groups are"
645 " are '%s', owning groups '%s'; retry the"
648 utils.CommaJoin(inst_groups),
649 utils.CommaJoin(owned_groups)),
655 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
656 """Checks if the instances in a node group are still correct.
658 @type cfg: L{config.ConfigWriter}
659 @param cfg: The cluster configuration
660 @type group_uuid: string
661 @param group_uuid: Node group UUID
662 @type owned_instances: set or frozenset
663 @param owned_instances: List of currently owned instances
666 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
667 if owned_instances != wanted_instances:
668 raise errors.OpPrereqError("Instances in node group '%s' changed since"
669 " locks were acquired, wanted '%s', have '%s';"
670 " retry the operation" %
672 utils.CommaJoin(wanted_instances),
673 utils.CommaJoin(owned_instances)),
676 return wanted_instances
679 def _SupportsOob(cfg, node):
680 """Tells if node supports OOB.
682 @type cfg: L{config.ConfigWriter}
683 @param cfg: The cluster configuration
684 @type node: L{objects.Node}
685 @param node: The node
686 @return: The OOB script if supported or an empty string otherwise
689 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
692 def _GetWantedNodes(lu, nodes):
693 """Returns list of checked and expanded node names.
695 @type lu: L{LogicalUnit}
696 @param lu: the logical unit on whose behalf we execute
698 @param nodes: list of node names or None for all nodes
700 @return: the list of nodes, sorted
701 @raise errors.ProgrammerError: if the nodes parameter is wrong type
705 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
707 return utils.NiceSort(lu.cfg.GetNodeList())
710 def _GetWantedInstances(lu, instances):
711 """Returns list of checked and expanded instance names.
713 @type lu: L{LogicalUnit}
714 @param lu: the logical unit on whose behalf we execute
715 @type instances: list
716 @param instances: list of instance names or None for all instances
718 @return: the list of instances, sorted
719 @raise errors.OpPrereqError: if the instances parameter is wrong type
720 @raise errors.OpPrereqError: if any of the passed instances is not found
724 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
726 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
730 def _GetUpdatedParams(old_params, update_dict,
731 use_default=True, use_none=False):
732 """Return the new version of a parameter dictionary.
734 @type old_params: dict
735 @param old_params: old parameters
736 @type update_dict: dict
737 @param update_dict: dict containing new parameter values, or
738 constants.VALUE_DEFAULT to reset the parameter to its default
740 @param use_default: boolean
741 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
742 values as 'to be deleted' values
743 @param use_none: boolean
744 @type use_none: whether to recognise C{None} values as 'to be
747 @return: the new parameter dictionary
750 params_copy = copy.deepcopy(old_params)
751 for key, val in update_dict.iteritems():
752 if ((use_default and val == constants.VALUE_DEFAULT) or
753 (use_none and val is None)):
759 params_copy[key] = val
763 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
764 """Return the new version of a instance policy.
766 @param group_policy: whether this policy applies to a group and thus
767 we should support removal of policy entries
770 use_none = use_default = group_policy
771 ipolicy = copy.deepcopy(old_ipolicy)
772 for key, value in new_ipolicy.items():
773 if key not in constants.IPOLICY_ALL_KEYS:
774 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
776 if key in constants.IPOLICY_ISPECS:
777 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
778 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
780 use_default=use_default)
782 if (not value or value == [constants.VALUE_DEFAULT] or
783 value == constants.VALUE_DEFAULT):
787 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
788 " on the cluster'" % key,
791 if key in constants.IPOLICY_PARAMETERS:
792 # FIXME: we assume all such values are float
794 ipolicy[key] = float(value)
795 except (TypeError, ValueError), err:
796 raise errors.OpPrereqError("Invalid value for attribute"
797 " '%s': '%s', error: %s" %
798 (key, value, err), errors.ECODE_INVAL)
800 # FIXME: we assume all others are lists; this should be redone
802 ipolicy[key] = list(value)
804 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
805 except errors.ConfigurationError, err:
806 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
811 def _UpdateAndVerifySubDict(base, updates, type_check):
812 """Updates and verifies a dict with sub dicts of the same type.
814 @param base: The dict with the old data
815 @param updates: The dict with the new data
816 @param type_check: Dict suitable to ForceDictType to verify correct types
817 @returns: A new dict with updated and verified values
821 new = _GetUpdatedParams(old, value)
822 utils.ForceDictType(new, type_check)
825 ret = copy.deepcopy(base)
826 ret.update(dict((key, fn(base.get(key, {}), value))
827 for key, value in updates.items()))
831 def _MergeAndVerifyHvState(op_input, obj_input):
832 """Combines the hv state from an opcode with the one of the object
834 @param op_input: The input dict from the opcode
835 @param obj_input: The input dict from the objects
836 @return: The verified and updated dict
840 invalid_hvs = set(op_input) - constants.HYPER_TYPES
842 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
843 " %s" % utils.CommaJoin(invalid_hvs),
845 if obj_input is None:
847 type_check = constants.HVSTS_PARAMETER_TYPES
848 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
853 def _MergeAndVerifyDiskState(op_input, obj_input):
854 """Combines the disk state from an opcode with the one of the object
856 @param op_input: The input dict from the opcode
857 @param obj_input: The input dict from the objects
858 @return: The verified and updated dict
861 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
863 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
864 utils.CommaJoin(invalid_dst),
866 type_check = constants.DSS_PARAMETER_TYPES
867 if obj_input is None:
869 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
871 for key, value in op_input.items())
876 def _ReleaseLocks(lu, level, names=None, keep=None):
877 """Releases locks owned by an LU.
879 @type lu: L{LogicalUnit}
880 @param level: Lock level
881 @type names: list or None
882 @param names: Names of locks to release
883 @type keep: list or None
884 @param keep: Names of locks to retain
887 assert not (keep is not None and names is not None), \
888 "Only one of the 'names' and the 'keep' parameters can be given"
890 if names is not None:
891 should_release = names.__contains__
893 should_release = lambda name: name not in keep
895 should_release = None
897 owned = lu.owned_locks(level)
899 # Not owning any lock at this level, do nothing
906 # Determine which locks to release
908 if should_release(name):
913 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
915 # Release just some locks
916 lu.glm.release(level, names=release)
918 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
921 lu.glm.release(level)
923 assert not lu.glm.is_owned(level), "No locks should be owned"
926 def _MapInstanceDisksToNodes(instances):
927 """Creates a map from (node, volume) to instance name.
929 @type instances: list of L{objects.Instance}
930 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
933 return dict(((node, vol), inst.name)
934 for inst in instances
935 for (node, vols) in inst.MapLVsByNode().items()
939 def _RunPostHook(lu, node_name):
940 """Runs the post-hook for an opcode on a single node.
943 hm = lu.proc.BuildHooksManager(lu)
945 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
946 except Exception, err: # pylint: disable=W0703
947 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
950 def _CheckOutputFields(static, dynamic, selected):
951 """Checks whether all selected fields are valid.
953 @type static: L{utils.FieldSet}
954 @param static: static fields set
955 @type dynamic: L{utils.FieldSet}
956 @param dynamic: dynamic fields set
963 delta = f.NonMatching(selected)
965 raise errors.OpPrereqError("Unknown output fields selected: %s"
966 % ",".join(delta), errors.ECODE_INVAL)
969 def _CheckGlobalHvParams(params):
970 """Validates that given hypervisor params are not global ones.
972 This will ensure that instances don't get customised versions of
976 used_globals = constants.HVC_GLOBALS.intersection(params)
978 msg = ("The following hypervisor parameters are global and cannot"
979 " be customized at instance level, please modify them at"
980 " cluster level: %s" % utils.CommaJoin(used_globals))
981 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
984 def _CheckNodeOnline(lu, node, msg=None):
985 """Ensure that a given node is online.
987 @param lu: the LU on behalf of which we make the check
988 @param node: the node to check
989 @param msg: if passed, should be a message to replace the default one
990 @raise errors.OpPrereqError: if the node is offline
994 msg = "Can't use offline node"
995 if lu.cfg.GetNodeInfo(node).offline:
996 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
999 def _CheckNodeNotDrained(lu, node):
1000 """Ensure that a given node is not drained.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @raise errors.OpPrereqError: if the node is drained
1007 if lu.cfg.GetNodeInfo(node).drained:
1008 raise errors.OpPrereqError("Can't use drained node %s" % node,
1012 def _CheckNodeVmCapable(lu, node):
1013 """Ensure that a given node is vm capable.
1015 @param lu: the LU on behalf of which we make the check
1016 @param node: the node to check
1017 @raise errors.OpPrereqError: if the node is not vm capable
1020 if not lu.cfg.GetNodeInfo(node).vm_capable:
1021 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1025 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1026 """Ensure that a node supports a given OS.
1028 @param lu: the LU on behalf of which we make the check
1029 @param node: the node to check
1030 @param os_name: the OS to query about
1031 @param force_variant: whether to ignore variant errors
1032 @raise errors.OpPrereqError: if the node is not supporting the OS
1035 result = lu.rpc.call_os_get(node, os_name)
1036 result.Raise("OS '%s' not in supported OS list for node %s" %
1038 prereq=True, ecode=errors.ECODE_INVAL)
1039 if not force_variant:
1040 _CheckOSVariant(result.payload, os_name)
1043 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1044 """Ensure that a node has the given secondary ip.
1046 @type lu: L{LogicalUnit}
1047 @param lu: the LU on behalf of which we make the check
1049 @param node: the node to check
1050 @type secondary_ip: string
1051 @param secondary_ip: the ip to check
1052 @type prereq: boolean
1053 @param prereq: whether to throw a prerequisite or an execute error
1054 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1055 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1058 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1059 result.Raise("Failure checking secondary ip on node %s" % node,
1060 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1061 if not result.payload:
1062 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1063 " please fix and re-run this command" % secondary_ip)
1065 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1067 raise errors.OpExecError(msg)
1070 def _GetClusterDomainSecret():
1071 """Reads the cluster domain secret.
1074 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1078 def _CheckInstanceState(lu, instance, req_states, msg=None):
1079 """Ensure that an instance is in one of the required states.
1081 @param lu: the LU on behalf of which we make the check
1082 @param instance: the instance to check
1083 @param msg: if passed, should be a message to replace the default one
1084 @raise errors.OpPrereqError: if the instance is not in the required state
1088 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1089 if instance.admin_state not in req_states:
1090 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1091 (instance.name, instance.admin_state, msg),
1094 if constants.ADMINST_UP not in req_states:
1095 pnode = instance.primary_node
1096 if not lu.cfg.GetNodeInfo(pnode).offline:
1097 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1098 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1099 prereq=True, ecode=errors.ECODE_ENVIRON)
1100 if instance.name in ins_l.payload:
1101 raise errors.OpPrereqError("Instance %s is running, %s" %
1102 (instance.name, msg), errors.ECODE_STATE)
1104 lu.LogWarning("Primary node offline, ignoring check that instance"
1108 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1109 """Computes if value is in the desired range.
1111 @param name: name of the parameter for which we perform the check
1112 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1114 @param ipolicy: dictionary containing min, max and std values
1115 @param value: actual value that we want to use
1116 @return: None or element not meeting the criteria
1120 if value in [None, constants.VALUE_AUTO]:
1122 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1123 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1124 if value > max_v or min_v > value:
1126 fqn = "%s/%s" % (name, qualifier)
1129 return ("%s value %s is not in range [%s, %s]" %
1130 (fqn, value, min_v, max_v))
1134 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1135 nic_count, disk_sizes, spindle_use,
1136 _compute_fn=_ComputeMinMaxSpec):
1137 """Verifies ipolicy against provided specs.
1140 @param ipolicy: The ipolicy
1142 @param mem_size: The memory size
1143 @type cpu_count: int
1144 @param cpu_count: Used cpu cores
1145 @type disk_count: int
1146 @param disk_count: Number of disks used
1147 @type nic_count: int
1148 @param nic_count: Number of nics used
1149 @type disk_sizes: list of ints
1150 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1151 @type spindle_use: int
1152 @param spindle_use: The number of spindles this instance uses
1153 @param _compute_fn: The compute function (unittest only)
1154 @return: A list of violations, or an empty list of no violations are found
1157 assert disk_count == len(disk_sizes)
1160 (constants.ISPEC_MEM_SIZE, "", mem_size),
1161 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1162 (constants.ISPEC_DISK_COUNT, "", disk_count),
1163 (constants.ISPEC_NIC_COUNT, "", nic_count),
1164 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1165 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1166 for idx, d in enumerate(disk_sizes)]
1169 (_compute_fn(name, qualifier, ipolicy, value)
1170 for (name, qualifier, value) in test_settings))
1173 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1174 _compute_fn=_ComputeIPolicySpecViolation):
1175 """Compute if instance meets the specs of ipolicy.
1178 @param ipolicy: The ipolicy to verify against
1179 @type instance: L{objects.Instance}
1180 @param instance: The instance to verify
1181 @param _compute_fn: The function to verify ipolicy (unittest only)
1182 @see: L{_ComputeIPolicySpecViolation}
1185 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1186 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1187 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1188 disk_count = len(instance.disks)
1189 disk_sizes = [disk.size for disk in instance.disks]
1190 nic_count = len(instance.nics)
1192 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1193 disk_sizes, spindle_use)
1196 def _ComputeIPolicyInstanceSpecViolation(
1197 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1198 """Compute if instance specs meets the specs of ipolicy.
1201 @param ipolicy: The ipolicy to verify against
1202 @param instance_spec: dict
1203 @param instance_spec: The instance spec to verify
1204 @param _compute_fn: The function to verify ipolicy (unittest only)
1205 @see: L{_ComputeIPolicySpecViolation}
1208 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1209 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1210 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1211 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1212 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1213 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1215 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216 disk_sizes, spindle_use)
1219 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1221 _compute_fn=_ComputeIPolicyInstanceViolation):
1222 """Compute if instance meets the specs of the new target group.
1224 @param ipolicy: The ipolicy to verify
1225 @param instance: The instance object to verify
1226 @param current_group: The current group of the instance
1227 @param target_group: The new group of the instance
1228 @param _compute_fn: The function to verify ipolicy (unittest only)
1229 @see: L{_ComputeIPolicySpecViolation}
1232 if current_group == target_group:
1235 return _compute_fn(ipolicy, instance)
1238 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1239 _compute_fn=_ComputeIPolicyNodeViolation):
1240 """Checks that the target node is correct in terms of instance policy.
1242 @param ipolicy: The ipolicy to verify
1243 @param instance: The instance object to verify
1244 @param node: The new node to relocate
1245 @param ignore: Ignore violations of the ipolicy
1246 @param _compute_fn: The function to verify ipolicy (unittest only)
1247 @see: L{_ComputeIPolicySpecViolation}
1250 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1251 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1254 msg = ("Instance does not meet target node group's (%s) instance"
1255 " policy: %s") % (node.group, utils.CommaJoin(res))
1259 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1262 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1263 """Computes a set of any instances that would violate the new ipolicy.
1265 @param old_ipolicy: The current (still in-place) ipolicy
1266 @param new_ipolicy: The new (to become) ipolicy
1267 @param instances: List of instances to verify
1268 @return: A list of instances which violates the new ipolicy but
1272 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1273 _ComputeViolatingInstances(old_ipolicy, instances))
1276 def _ExpandItemName(fn, name, kind):
1277 """Expand an item name.
1279 @param fn: the function to use for expansion
1280 @param name: requested item name
1281 @param kind: text description ('Node' or 'Instance')
1282 @return: the resolved (full) name
1283 @raise errors.OpPrereqError: if the item is not found
1286 full_name = fn(name)
1287 if full_name is None:
1288 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1293 def _ExpandNodeName(cfg, name):
1294 """Wrapper over L{_ExpandItemName} for nodes."""
1295 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1298 def _ExpandInstanceName(cfg, name):
1299 """Wrapper over L{_ExpandItemName} for instance."""
1300 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1303 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1304 minmem, maxmem, vcpus, nics, disk_template, disks,
1305 bep, hvp, hypervisor_name, tags):
1306 """Builds instance related env variables for hooks
1308 This builds the hook environment from individual variables.
1311 @param name: the name of the instance
1312 @type primary_node: string
1313 @param primary_node: the name of the instance's primary node
1314 @type secondary_nodes: list
1315 @param secondary_nodes: list of secondary nodes as strings
1316 @type os_type: string
1317 @param os_type: the name of the instance's OS
1318 @type status: string
1319 @param status: the desired status of the instance
1320 @type minmem: string
1321 @param minmem: the minimum memory size of the instance
1322 @type maxmem: string
1323 @param maxmem: the maximum memory size of the instance
1325 @param vcpus: the count of VCPUs the instance has
1327 @param nics: list of tuples (ip, mac, mode, link) representing
1328 the NICs the instance has
1329 @type disk_template: string
1330 @param disk_template: the disk template of the instance
1332 @param disks: the list of (size, mode) pairs
1334 @param bep: the backend parameters for the instance
1336 @param hvp: the hypervisor parameters for the instance
1337 @type hypervisor_name: string
1338 @param hypervisor_name: the hypervisor for the instance
1340 @param tags: list of instance tags as strings
1342 @return: the hook environment for this instance
1347 "INSTANCE_NAME": name,
1348 "INSTANCE_PRIMARY": primary_node,
1349 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1350 "INSTANCE_OS_TYPE": os_type,
1351 "INSTANCE_STATUS": status,
1352 "INSTANCE_MINMEM": minmem,
1353 "INSTANCE_MAXMEM": maxmem,
1354 # TODO(2.7) remove deprecated "memory" value
1355 "INSTANCE_MEMORY": maxmem,
1356 "INSTANCE_VCPUS": vcpus,
1357 "INSTANCE_DISK_TEMPLATE": disk_template,
1358 "INSTANCE_HYPERVISOR": hypervisor_name,
1361 nic_count = len(nics)
1362 for idx, (ip, mac, mode, link) in enumerate(nics):
1365 env["INSTANCE_NIC%d_IP" % idx] = ip
1366 env["INSTANCE_NIC%d_MAC" % idx] = mac
1367 env["INSTANCE_NIC%d_MODE" % idx] = mode
1368 env["INSTANCE_NIC%d_LINK" % idx] = link
1369 if mode == constants.NIC_MODE_BRIDGED:
1370 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1374 env["INSTANCE_NIC_COUNT"] = nic_count
1377 disk_count = len(disks)
1378 for idx, (size, mode) in enumerate(disks):
1379 env["INSTANCE_DISK%d_SIZE" % idx] = size
1380 env["INSTANCE_DISK%d_MODE" % idx] = mode
1384 env["INSTANCE_DISK_COUNT"] = disk_count
1389 env["INSTANCE_TAGS"] = " ".join(tags)
1391 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1392 for key, value in source.items():
1393 env["INSTANCE_%s_%s" % (kind, key)] = value
1398 def _NICListToTuple(lu, nics):
1399 """Build a list of nic information tuples.
1401 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1402 value in LUInstanceQueryData.
1404 @type lu: L{LogicalUnit}
1405 @param lu: the logical unit on whose behalf we execute
1406 @type nics: list of L{objects.NIC}
1407 @param nics: list of nics to convert to hooks tuples
1411 cluster = lu.cfg.GetClusterInfo()
1415 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1416 mode = filled_params[constants.NIC_MODE]
1417 link = filled_params[constants.NIC_LINK]
1418 hooks_nics.append((ip, mac, mode, link))
1422 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1423 """Builds instance related env variables for hooks from an object.
1425 @type lu: L{LogicalUnit}
1426 @param lu: the logical unit on whose behalf we execute
1427 @type instance: L{objects.Instance}
1428 @param instance: the instance for which we should build the
1430 @type override: dict
1431 @param override: dictionary with key/values that will override
1434 @return: the hook environment dictionary
1437 cluster = lu.cfg.GetClusterInfo()
1438 bep = cluster.FillBE(instance)
1439 hvp = cluster.FillHV(instance)
1441 "name": instance.name,
1442 "primary_node": instance.primary_node,
1443 "secondary_nodes": instance.secondary_nodes,
1444 "os_type": instance.os,
1445 "status": instance.admin_state,
1446 "maxmem": bep[constants.BE_MAXMEM],
1447 "minmem": bep[constants.BE_MINMEM],
1448 "vcpus": bep[constants.BE_VCPUS],
1449 "nics": _NICListToTuple(lu, instance.nics),
1450 "disk_template": instance.disk_template,
1451 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1454 "hypervisor_name": instance.hypervisor,
1455 "tags": instance.tags,
1458 args.update(override)
1459 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1462 def _AdjustCandidatePool(lu, exceptions):
1463 """Adjust the candidate pool after node operations.
1466 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1468 lu.LogInfo("Promoted nodes to master candidate role: %s",
1469 utils.CommaJoin(node.name for node in mod_list))
1470 for name in mod_list:
1471 lu.context.ReaddNode(name)
1472 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1474 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1478 def _DecideSelfPromotion(lu, exceptions=None):
1479 """Decide whether I should promote myself as a master candidate.
1482 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1483 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1484 # the new node will increase mc_max with one, so:
1485 mc_should = min(mc_should + 1, cp_size)
1486 return mc_now < mc_should
1489 def _ComputeViolatingInstances(ipolicy, instances):
1490 """Computes a set of instances who violates given ipolicy.
1492 @param ipolicy: The ipolicy to verify
1493 @type instances: object.Instance
1494 @param instances: List of instances to verify
1495 @return: A frozenset of instance names violating the ipolicy
1498 return frozenset([inst.name for inst in instances
1499 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1502 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1503 """Check that the brigdes needed by a list of nics exist.
1506 cluster = lu.cfg.GetClusterInfo()
1507 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1508 brlist = [params[constants.NIC_LINK] for params in paramslist
1509 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1511 result = lu.rpc.call_bridges_exist(target_node, brlist)
1512 result.Raise("Error checking bridges on destination node '%s'" %
1513 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1516 def _CheckInstanceBridgesExist(lu, instance, node=None):
1517 """Check that the brigdes needed by an instance exist.
1521 node = instance.primary_node
1522 _CheckNicsBridgesExist(lu, instance.nics, node)
1525 def _CheckOSVariant(os_obj, name):
1526 """Check whether an OS name conforms to the os variants specification.
1528 @type os_obj: L{objects.OS}
1529 @param os_obj: OS object to check
1531 @param name: OS name passed by the user, to check for validity
1534 variant = objects.OS.GetVariant(name)
1535 if not os_obj.supported_variants:
1537 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1538 " passed)" % (os_obj.name, variant),
1542 raise errors.OpPrereqError("OS name must include a variant",
1545 if variant not in os_obj.supported_variants:
1546 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1549 def _GetNodeInstancesInner(cfg, fn):
1550 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1553 def _GetNodeInstances(cfg, node_name):
1554 """Returns a list of all primary and secondary instances on a node.
1558 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1561 def _GetNodePrimaryInstances(cfg, node_name):
1562 """Returns primary instances on a node.
1565 return _GetNodeInstancesInner(cfg,
1566 lambda inst: node_name == inst.primary_node)
1569 def _GetNodeSecondaryInstances(cfg, node_name):
1570 """Returns secondary instances on a node.
1573 return _GetNodeInstancesInner(cfg,
1574 lambda inst: node_name in inst.secondary_nodes)
1577 def _GetStorageTypeArgs(cfg, storage_type):
1578 """Returns the arguments for a storage type.
1581 # Special case for file storage
1582 if storage_type == constants.ST_FILE:
1583 # storage.FileStorage wants a list of storage directories
1584 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1589 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1592 for dev in instance.disks:
1593 cfg.SetDiskID(dev, node_name)
1595 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1597 result.Raise("Failed to get disk status from node %s" % node_name,
1598 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1600 for idx, bdev_status in enumerate(result.payload):
1601 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1607 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1608 """Check the sanity of iallocator and node arguments and use the
1609 cluster-wide iallocator if appropriate.
1611 Check that at most one of (iallocator, node) is specified. If none is
1612 specified, then the LU's opcode's iallocator slot is filled with the
1613 cluster-wide default iallocator.
1615 @type iallocator_slot: string
1616 @param iallocator_slot: the name of the opcode iallocator slot
1617 @type node_slot: string
1618 @param node_slot: the name of the opcode target node slot
1621 node = getattr(lu.op, node_slot, None)
1622 ialloc = getattr(lu.op, iallocator_slot, None)
1624 if node is not None and ialloc is not None:
1625 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1627 elif node is None and ialloc is None:
1628 default_iallocator = lu.cfg.GetDefaultIAllocator()
1629 if default_iallocator:
1630 setattr(lu.op, iallocator_slot, default_iallocator)
1632 raise errors.OpPrereqError("No iallocator or node given and no"
1633 " cluster-wide default iallocator found;"
1634 " please specify either an iallocator or a"
1635 " node, or set a cluster-wide default"
1636 " iallocator", errors.ECODE_INVAL)
1639 def _GetDefaultIAllocator(cfg, ialloc):
1640 """Decides on which iallocator to use.
1642 @type cfg: L{config.ConfigWriter}
1643 @param cfg: Cluster configuration object
1644 @type ialloc: string or None
1645 @param ialloc: Iallocator specified in opcode
1647 @return: Iallocator name
1651 # Use default iallocator
1652 ialloc = cfg.GetDefaultIAllocator()
1655 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1656 " opcode nor as a cluster-wide default",
1662 class LUClusterPostInit(LogicalUnit):
1663 """Logical unit for running hooks after cluster initialization.
1666 HPATH = "cluster-init"
1667 HTYPE = constants.HTYPE_CLUSTER
1669 def BuildHooksEnv(self):
1674 "OP_TARGET": self.cfg.GetClusterName(),
1677 def BuildHooksNodes(self):
1678 """Build hooks nodes.
1681 return ([], [self.cfg.GetMasterNode()])
1683 def Exec(self, feedback_fn):
1690 class LUClusterDestroy(LogicalUnit):
1691 """Logical unit for destroying the cluster.
1694 HPATH = "cluster-destroy"
1695 HTYPE = constants.HTYPE_CLUSTER
1697 def BuildHooksEnv(self):
1702 "OP_TARGET": self.cfg.GetClusterName(),
1705 def BuildHooksNodes(self):
1706 """Build hooks nodes.
1711 def CheckPrereq(self):
1712 """Check prerequisites.
1714 This checks whether the cluster is empty.
1716 Any errors are signaled by raising errors.OpPrereqError.
1719 master = self.cfg.GetMasterNode()
1721 nodelist = self.cfg.GetNodeList()
1722 if len(nodelist) != 1 or nodelist[0] != master:
1723 raise errors.OpPrereqError("There are still %d node(s) in"
1724 " this cluster." % (len(nodelist) - 1),
1726 instancelist = self.cfg.GetInstanceList()
1728 raise errors.OpPrereqError("There are still %d instance(s) in"
1729 " this cluster." % len(instancelist),
1732 def Exec(self, feedback_fn):
1733 """Destroys the cluster.
1736 master_params = self.cfg.GetMasterNetworkParameters()
1738 # Run post hooks on master node before it's removed
1739 _RunPostHook(self, master_params.name)
1741 ems = self.cfg.GetUseExternalMipScript()
1742 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1745 self.LogWarning("Error disabling the master IP address: %s",
1748 return master_params.name
1751 def _VerifyCertificate(filename):
1752 """Verifies a certificate for L{LUClusterVerifyConfig}.
1754 @type filename: string
1755 @param filename: Path to PEM file
1759 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1760 utils.ReadFile(filename))
1761 except Exception, err: # pylint: disable=W0703
1762 return (LUClusterVerifyConfig.ETYPE_ERROR,
1763 "Failed to load X509 certificate %s: %s" % (filename, err))
1766 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1767 constants.SSL_CERT_EXPIRATION_ERROR)
1770 fnamemsg = "While verifying %s: %s" % (filename, msg)
1775 return (None, fnamemsg)
1776 elif errcode == utils.CERT_WARNING:
1777 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1778 elif errcode == utils.CERT_ERROR:
1779 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1781 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1784 def _GetAllHypervisorParameters(cluster, instances):
1785 """Compute the set of all hypervisor parameters.
1787 @type cluster: L{objects.Cluster}
1788 @param cluster: the cluster object
1789 @param instances: list of L{objects.Instance}
1790 @param instances: additional instances from which to obtain parameters
1791 @rtype: list of (origin, hypervisor, parameters)
1792 @return: a list with all parameters found, indicating the hypervisor they
1793 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1798 for hv_name in cluster.enabled_hypervisors:
1799 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1801 for os_name, os_hvp in cluster.os_hvp.items():
1802 for hv_name, hv_params in os_hvp.items():
1804 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1805 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1807 # TODO: collapse identical parameter values in a single one
1808 for instance in instances:
1809 if instance.hvparams:
1810 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1811 cluster.FillHV(instance)))
1816 class _VerifyErrors(object):
1817 """Mix-in for cluster/group verify LUs.
1819 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1820 self.op and self._feedback_fn to be available.)
1824 ETYPE_FIELD = "code"
1825 ETYPE_ERROR = "ERROR"
1826 ETYPE_WARNING = "WARNING"
1828 def _Error(self, ecode, item, msg, *args, **kwargs):
1829 """Format an error message.
1831 Based on the opcode's error_codes parameter, either format a
1832 parseable error code, or a simpler error string.
1834 This must be called only from Exec and functions called from Exec.
1837 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1838 itype, etxt, _ = ecode
1839 # first complete the msg
1842 # then format the whole message
1843 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1844 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1850 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1851 # and finally report it via the feedback_fn
1852 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1854 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1855 """Log an error message if the passed condition is True.
1859 or self.op.debug_simulate_errors) # pylint: disable=E1101
1861 # If the error code is in the list of ignored errors, demote the error to a
1863 (_, etxt, _) = ecode
1864 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1865 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1868 self._Error(ecode, *args, **kwargs)
1870 # do not mark the operation as failed for WARN cases only
1871 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1872 self.bad = self.bad or cond
1875 class LUClusterVerify(NoHooksLU):
1876 """Submits all jobs necessary to verify the cluster.
1881 def ExpandNames(self):
1882 self.needed_locks = {}
1884 def Exec(self, feedback_fn):
1887 if self.op.group_name:
1888 groups = [self.op.group_name]
1889 depends_fn = lambda: None
1891 groups = self.cfg.GetNodeGroupList()
1893 # Verify global configuration
1895 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1898 # Always depend on global verification
1899 depends_fn = lambda: [(-len(jobs), [])]
1902 [opcodes.OpClusterVerifyGroup(group_name=group,
1903 ignore_errors=self.op.ignore_errors,
1904 depends=depends_fn())]
1905 for group in groups)
1907 # Fix up all parameters
1908 for op in itertools.chain(*jobs): # pylint: disable=W0142
1909 op.debug_simulate_errors = self.op.debug_simulate_errors
1910 op.verbose = self.op.verbose
1911 op.error_codes = self.op.error_codes
1913 op.skip_checks = self.op.skip_checks
1914 except AttributeError:
1915 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1917 return ResultWithJobs(jobs)
1920 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1921 """Verifies the cluster config.
1926 def _VerifyHVP(self, hvp_data):
1927 """Verifies locally the syntax of the hypervisor parameters.
1930 for item, hv_name, hv_params in hvp_data:
1931 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1934 hv_class = hypervisor.GetHypervisor(hv_name)
1935 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1936 hv_class.CheckParameterSyntax(hv_params)
1937 except errors.GenericError, err:
1938 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1940 def ExpandNames(self):
1941 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1942 self.share_locks = _ShareAll()
1944 def CheckPrereq(self):
1945 """Check prerequisites.
1948 # Retrieve all information
1949 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1950 self.all_node_info = self.cfg.GetAllNodesInfo()
1951 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1953 def Exec(self, feedback_fn):
1954 """Verify integrity of cluster, performing various test on nodes.
1958 self._feedback_fn = feedback_fn
1960 feedback_fn("* Verifying cluster config")
1962 for msg in self.cfg.VerifyConfig():
1963 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1965 feedback_fn("* Verifying cluster certificate files")
1967 for cert_filename in constants.ALL_CERT_FILES:
1968 (errcode, msg) = _VerifyCertificate(cert_filename)
1969 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1971 feedback_fn("* Verifying hypervisor parameters")
1973 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1974 self.all_inst_info.values()))
1976 feedback_fn("* Verifying all nodes belong to an existing group")
1978 # We do this verification here because, should this bogus circumstance
1979 # occur, it would never be caught by VerifyGroup, which only acts on
1980 # nodes/instances reachable from existing node groups.
1982 dangling_nodes = set(node.name for node in self.all_node_info.values()
1983 if node.group not in self.all_group_info)
1985 dangling_instances = {}
1986 no_node_instances = []
1988 for inst in self.all_inst_info.values():
1989 if inst.primary_node in dangling_nodes:
1990 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1991 elif inst.primary_node not in self.all_node_info:
1992 no_node_instances.append(inst.name)
1997 utils.CommaJoin(dangling_instances.get(node.name,
1999 for node in dangling_nodes]
2001 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2003 "the following nodes (and their instances) belong to a non"
2004 " existing group: %s", utils.CommaJoin(pretty_dangling))
2006 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2008 "the following instances have a non-existing primary-node:"
2009 " %s", utils.CommaJoin(no_node_instances))
2014 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2015 """Verifies the status of a node group.
2018 HPATH = "cluster-verify"
2019 HTYPE = constants.HTYPE_CLUSTER
2022 _HOOKS_INDENT_RE = re.compile("^", re.M)
2024 class NodeImage(object):
2025 """A class representing the logical and physical status of a node.
2028 @ivar name: the node name to which this object refers
2029 @ivar volumes: a structure as returned from
2030 L{ganeti.backend.GetVolumeList} (runtime)
2031 @ivar instances: a list of running instances (runtime)
2032 @ivar pinst: list of configured primary instances (config)
2033 @ivar sinst: list of configured secondary instances (config)
2034 @ivar sbp: dictionary of {primary-node: list of instances} for all
2035 instances for which this node is secondary (config)
2036 @ivar mfree: free memory, as reported by hypervisor (runtime)
2037 @ivar dfree: free disk, as reported by the node (runtime)
2038 @ivar offline: the offline status (config)
2039 @type rpc_fail: boolean
2040 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2041 not whether the individual keys were correct) (runtime)
2042 @type lvm_fail: boolean
2043 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2044 @type hyp_fail: boolean
2045 @ivar hyp_fail: whether the RPC call didn't return the instance list
2046 @type ghost: boolean
2047 @ivar ghost: whether this is a known node or not (config)
2048 @type os_fail: boolean
2049 @ivar os_fail: whether the RPC call didn't return valid OS data
2051 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2052 @type vm_capable: boolean
2053 @ivar vm_capable: whether the node can host instances
2056 def __init__(self, offline=False, name=None, vm_capable=True):
2065 self.offline = offline
2066 self.vm_capable = vm_capable
2067 self.rpc_fail = False
2068 self.lvm_fail = False
2069 self.hyp_fail = False
2071 self.os_fail = False
2074 def ExpandNames(self):
2075 # This raises errors.OpPrereqError on its own:
2076 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2078 # Get instances in node group; this is unsafe and needs verification later
2080 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2082 self.needed_locks = {
2083 locking.LEVEL_INSTANCE: inst_names,
2084 locking.LEVEL_NODEGROUP: [self.group_uuid],
2085 locking.LEVEL_NODE: [],
2088 self.share_locks = _ShareAll()
2090 def DeclareLocks(self, level):
2091 if level == locking.LEVEL_NODE:
2092 # Get members of node group; this is unsafe and needs verification later
2093 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2095 all_inst_info = self.cfg.GetAllInstancesInfo()
2097 # In Exec(), we warn about mirrored instances that have primary and
2098 # secondary living in separate node groups. To fully verify that
2099 # volumes for these instances are healthy, we will need to do an
2100 # extra call to their secondaries. We ensure here those nodes will
2102 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2103 # Important: access only the instances whose lock is owned
2104 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2105 nodes.update(all_inst_info[inst].secondary_nodes)
2107 self.needed_locks[locking.LEVEL_NODE] = nodes
2109 def CheckPrereq(self):
2110 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2111 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2113 group_nodes = set(self.group_info.members)
2115 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2118 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2120 unlocked_instances = \
2121 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2124 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2125 utils.CommaJoin(unlocked_nodes),
2128 if unlocked_instances:
2129 raise errors.OpPrereqError("Missing lock for instances: %s" %
2130 utils.CommaJoin(unlocked_instances),
2133 self.all_node_info = self.cfg.GetAllNodesInfo()
2134 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2136 self.my_node_names = utils.NiceSort(group_nodes)
2137 self.my_inst_names = utils.NiceSort(group_instances)
2139 self.my_node_info = dict((name, self.all_node_info[name])
2140 for name in self.my_node_names)
2142 self.my_inst_info = dict((name, self.all_inst_info[name])
2143 for name in self.my_inst_names)
2145 # We detect here the nodes that will need the extra RPC calls for verifying
2146 # split LV volumes; they should be locked.
2147 extra_lv_nodes = set()
2149 for inst in self.my_inst_info.values():
2150 if inst.disk_template in constants.DTS_INT_MIRROR:
2151 for nname in inst.all_nodes:
2152 if self.all_node_info[nname].group != self.group_uuid:
2153 extra_lv_nodes.add(nname)
2155 unlocked_lv_nodes = \
2156 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2158 if unlocked_lv_nodes:
2159 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2160 utils.CommaJoin(unlocked_lv_nodes),
2162 self.extra_lv_nodes = list(extra_lv_nodes)
2164 def _VerifyNode(self, ninfo, nresult):
2165 """Perform some basic validation on data returned from a node.
2167 - check the result data structure is well formed and has all the
2169 - check ganeti version
2171 @type ninfo: L{objects.Node}
2172 @param ninfo: the node to check
2173 @param nresult: the results from the node
2175 @return: whether overall this call was successful (and we can expect
2176 reasonable values in the respose)
2180 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2182 # main result, nresult should be a non-empty dict
2183 test = not nresult or not isinstance(nresult, dict)
2184 _ErrorIf(test, constants.CV_ENODERPC, node,
2185 "unable to verify node: no data returned")
2189 # compares ganeti version
2190 local_version = constants.PROTOCOL_VERSION
2191 remote_version = nresult.get("version", None)
2192 test = not (remote_version and
2193 isinstance(remote_version, (list, tuple)) and
2194 len(remote_version) == 2)
2195 _ErrorIf(test, constants.CV_ENODERPC, node,
2196 "connection to node returned invalid data")
2200 test = local_version != remote_version[0]
2201 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2202 "incompatible protocol versions: master %s,"
2203 " node %s", local_version, remote_version[0])
2207 # node seems compatible, we can actually try to look into its results
2209 # full package version
2210 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2211 constants.CV_ENODEVERSION, node,
2212 "software version mismatch: master %s, node %s",
2213 constants.RELEASE_VERSION, remote_version[1],
2214 code=self.ETYPE_WARNING)
2216 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2217 if ninfo.vm_capable and isinstance(hyp_result, dict):
2218 for hv_name, hv_result in hyp_result.iteritems():
2219 test = hv_result is not None
2220 _ErrorIf(test, constants.CV_ENODEHV, node,
2221 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2223 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2224 if ninfo.vm_capable and isinstance(hvp_result, list):
2225 for item, hv_name, hv_result in hvp_result:
2226 _ErrorIf(True, constants.CV_ENODEHV, node,
2227 "hypervisor %s parameter verify failure (source %s): %s",
2228 hv_name, item, hv_result)
2230 test = nresult.get(constants.NV_NODESETUP,
2231 ["Missing NODESETUP results"])
2232 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2237 def _VerifyNodeTime(self, ninfo, nresult,
2238 nvinfo_starttime, nvinfo_endtime):
2239 """Check the node time.
2241 @type ninfo: L{objects.Node}
2242 @param ninfo: the node to check
2243 @param nresult: the remote results for the node
2244 @param nvinfo_starttime: the start time of the RPC call
2245 @param nvinfo_endtime: the end time of the RPC call
2249 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2251 ntime = nresult.get(constants.NV_TIME, None)
2253 ntime_merged = utils.MergeTime(ntime)
2254 except (ValueError, TypeError):
2255 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2258 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2259 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2260 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2261 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2265 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2266 "Node time diverges by at least %s from master node time",
2269 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2270 """Check the node LVM results.
2272 @type ninfo: L{objects.Node}
2273 @param ninfo: the node to check
2274 @param nresult: the remote results for the node
2275 @param vg_name: the configured VG name
2282 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2284 # checks vg existence and size > 20G
2285 vglist = nresult.get(constants.NV_VGLIST, None)
2287 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2289 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2290 constants.MIN_VG_SIZE)
2291 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2294 pvlist = nresult.get(constants.NV_PVLIST, None)
2295 test = pvlist is None
2296 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2298 # check that ':' is not present in PV names, since it's a
2299 # special character for lvcreate (denotes the range of PEs to
2301 for _, pvname, owner_vg in pvlist:
2302 test = ":" in pvname
2303 _ErrorIf(test, constants.CV_ENODELVM, node,
2304 "Invalid character ':' in PV '%s' of VG '%s'",
2307 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2308 """Check the node bridges.
2310 @type ninfo: L{objects.Node}
2311 @param ninfo: the node to check
2312 @param nresult: the remote results for the node
2313 @param bridges: the expected list of bridges
2320 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2322 missing = nresult.get(constants.NV_BRIDGES, None)
2323 test = not isinstance(missing, list)
2324 _ErrorIf(test, constants.CV_ENODENET, node,
2325 "did not return valid bridge information")
2327 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2328 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2330 def _VerifyNodeUserScripts(self, ninfo, nresult):
2331 """Check the results of user scripts presence and executability on the node
2333 @type ninfo: L{objects.Node}
2334 @param ninfo: the node to check
2335 @param nresult: the remote results for the node
2340 test = not constants.NV_USERSCRIPTS in nresult
2341 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2342 "did not return user scripts information")
2344 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2346 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2347 "user scripts not present or not executable: %s" %
2348 utils.CommaJoin(sorted(broken_scripts)))
2350 def _VerifyNodeNetwork(self, ninfo, nresult):
2351 """Check the node network connectivity results.
2353 @type ninfo: L{objects.Node}
2354 @param ninfo: the node to check
2355 @param nresult: the remote results for the node
2359 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2361 test = constants.NV_NODELIST not in nresult
2362 _ErrorIf(test, constants.CV_ENODESSH, node,
2363 "node hasn't returned node ssh connectivity data")
2365 if nresult[constants.NV_NODELIST]:
2366 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2367 _ErrorIf(True, constants.CV_ENODESSH, node,
2368 "ssh communication with node '%s': %s", a_node, a_msg)
2370 test = constants.NV_NODENETTEST not in nresult
2371 _ErrorIf(test, constants.CV_ENODENET, node,
2372 "node hasn't returned node tcp connectivity data")
2374 if nresult[constants.NV_NODENETTEST]:
2375 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2377 _ErrorIf(True, constants.CV_ENODENET, node,
2378 "tcp communication with node '%s': %s",
2379 anode, nresult[constants.NV_NODENETTEST][anode])
2381 test = constants.NV_MASTERIP not in nresult
2382 _ErrorIf(test, constants.CV_ENODENET, node,
2383 "node hasn't returned node master IP reachability data")
2385 if not nresult[constants.NV_MASTERIP]:
2386 if node == self.master_node:
2387 msg = "the master node cannot reach the master IP (not configured?)"
2389 msg = "cannot reach the master IP"
2390 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2392 def _VerifyInstance(self, instance, instanceconfig, node_image,
2394 """Verify an instance.
2396 This function checks to see if the required block devices are
2397 available on the instance's node.
2400 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2401 node_current = instanceconfig.primary_node
2403 node_vol_should = {}
2404 instanceconfig.MapLVsByNode(node_vol_should)
2406 cluster = self.cfg.GetClusterInfo()
2407 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2409 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2410 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2412 for node in node_vol_should:
2413 n_img = node_image[node]
2414 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2415 # ignore missing volumes on offline or broken nodes
2417 for volume in node_vol_should[node]:
2418 test = volume not in n_img.volumes
2419 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2420 "volume %s missing on node %s", volume, node)
2422 if instanceconfig.admin_state == constants.ADMINST_UP:
2423 pri_img = node_image[node_current]
2424 test = instance not in pri_img.instances and not pri_img.offline
2425 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2426 "instance not running on its primary node %s",
2429 diskdata = [(nname, success, status, idx)
2430 for (nname, disks) in diskstatus.items()
2431 for idx, (success, status) in enumerate(disks)]
2433 for nname, success, bdev_status, idx in diskdata:
2434 # the 'ghost node' construction in Exec() ensures that we have a
2436 snode = node_image[nname]
2437 bad_snode = snode.ghost or snode.offline
2438 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2439 not success and not bad_snode,
2440 constants.CV_EINSTANCEFAULTYDISK, instance,
2441 "couldn't retrieve status for disk/%s on %s: %s",
2442 idx, nname, bdev_status)
2443 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2444 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2445 constants.CV_EINSTANCEFAULTYDISK, instance,
2446 "disk/%s on %s is faulty", idx, nname)
2448 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2449 """Verify if there are any unknown volumes in the cluster.
2451 The .os, .swap and backup volumes are ignored. All other volumes are
2452 reported as unknown.
2454 @type reserved: L{ganeti.utils.FieldSet}
2455 @param reserved: a FieldSet of reserved volume names
2458 for node, n_img in node_image.items():
2459 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2460 self.all_node_info[node].group != self.group_uuid):
2461 # skip non-healthy nodes
2463 for volume in n_img.volumes:
2464 test = ((node not in node_vol_should or
2465 volume not in node_vol_should[node]) and
2466 not reserved.Matches(volume))
2467 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2468 "volume %s is unknown", volume)
2470 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2471 """Verify N+1 Memory Resilience.
2473 Check that if one single node dies we can still start all the
2474 instances it was primary for.
2477 cluster_info = self.cfg.GetClusterInfo()
2478 for node, n_img in node_image.items():
2479 # This code checks that every node which is now listed as
2480 # secondary has enough memory to host all instances it is
2481 # supposed to should a single other node in the cluster fail.
2482 # FIXME: not ready for failover to an arbitrary node
2483 # FIXME: does not support file-backed instances
2484 # WARNING: we currently take into account down instances as well
2485 # as up ones, considering that even if they're down someone
2486 # might want to start them even in the event of a node failure.
2487 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2488 # we're skipping nodes marked offline and nodes in other groups from
2489 # the N+1 warning, since most likely we don't have good memory
2490 # infromation from them; we already list instances living on such
2491 # nodes, and that's enough warning
2493 #TODO(dynmem): also consider ballooning out other instances
2494 for prinode, instances in n_img.sbp.items():
2496 for instance in instances:
2497 bep = cluster_info.FillBE(instance_cfg[instance])
2498 if bep[constants.BE_AUTO_BALANCE]:
2499 needed_mem += bep[constants.BE_MINMEM]
2500 test = n_img.mfree < needed_mem
2501 self._ErrorIf(test, constants.CV_ENODEN1, node,
2502 "not enough memory to accomodate instance failovers"
2503 " should node %s fail (%dMiB needed, %dMiB available)",
2504 prinode, needed_mem, n_img.mfree)
2507 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2508 (files_all, files_opt, files_mc, files_vm)):
2509 """Verifies file checksums collected from all nodes.
2511 @param errorif: Callback for reporting errors
2512 @param nodeinfo: List of L{objects.Node} objects
2513 @param master_node: Name of master node
2514 @param all_nvinfo: RPC results
2517 # Define functions determining which nodes to consider for a file
2520 (files_mc, lambda node: (node.master_candidate or
2521 node.name == master_node)),
2522 (files_vm, lambda node: node.vm_capable),
2525 # Build mapping from filename to list of nodes which should have the file
2527 for (files, fn) in files2nodefn:
2529 filenodes = nodeinfo
2531 filenodes = filter(fn, nodeinfo)
2532 nodefiles.update((filename,
2533 frozenset(map(operator.attrgetter("name"), filenodes)))
2534 for filename in files)
2536 assert set(nodefiles) == (files_all | files_mc | files_vm)
2538 fileinfo = dict((filename, {}) for filename in nodefiles)
2539 ignore_nodes = set()
2541 for node in nodeinfo:
2543 ignore_nodes.add(node.name)
2546 nresult = all_nvinfo[node.name]
2548 if nresult.fail_msg or not nresult.payload:
2551 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2553 test = not (node_files and isinstance(node_files, dict))
2554 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2555 "Node did not return file checksum data")
2557 ignore_nodes.add(node.name)
2560 # Build per-checksum mapping from filename to nodes having it
2561 for (filename, checksum) in node_files.items():
2562 assert filename in nodefiles
2563 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2565 for (filename, checksums) in fileinfo.items():
2566 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2568 # Nodes having the file
2569 with_file = frozenset(node_name
2570 for nodes in fileinfo[filename].values()
2571 for node_name in nodes) - ignore_nodes
2573 expected_nodes = nodefiles[filename] - ignore_nodes
2575 # Nodes missing file
2576 missing_file = expected_nodes - with_file
2578 if filename in files_opt:
2580 errorif(missing_file and missing_file != expected_nodes,
2581 constants.CV_ECLUSTERFILECHECK, None,
2582 "File %s is optional, but it must exist on all or no"
2583 " nodes (not found on %s)",
2584 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2586 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2587 "File %s is missing from node(s) %s", filename,
2588 utils.CommaJoin(utils.NiceSort(missing_file)))
2590 # Warn if a node has a file it shouldn't
2591 unexpected = with_file - expected_nodes
2593 constants.CV_ECLUSTERFILECHECK, None,
2594 "File %s should not exist on node(s) %s",
2595 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2597 # See if there are multiple versions of the file
2598 test = len(checksums) > 1
2600 variants = ["variant %s on %s" %
2601 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2602 for (idx, (checksum, nodes)) in
2603 enumerate(sorted(checksums.items()))]
2607 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2608 "File %s found with %s different checksums (%s)",
2609 filename, len(checksums), "; ".join(variants))
2611 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2613 """Verifies and the node DRBD status.
2615 @type ninfo: L{objects.Node}
2616 @param ninfo: the node to check
2617 @param nresult: the remote results for the node
2618 @param instanceinfo: the dict of instances
2619 @param drbd_helper: the configured DRBD usermode helper
2620 @param drbd_map: the DRBD map as returned by
2621 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2625 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2628 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2629 test = (helper_result is None)
2630 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2631 "no drbd usermode helper returned")
2633 status, payload = helper_result
2635 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2636 "drbd usermode helper check unsuccessful: %s", payload)
2637 test = status and (payload != drbd_helper)
2638 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2639 "wrong drbd usermode helper: %s", payload)
2641 # compute the DRBD minors
2643 for minor, instance in drbd_map[node].items():
2644 test = instance not in instanceinfo
2645 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2646 "ghost instance '%s' in temporary DRBD map", instance)
2647 # ghost instance should not be running, but otherwise we
2648 # don't give double warnings (both ghost instance and
2649 # unallocated minor in use)
2651 node_drbd[minor] = (instance, False)
2653 instance = instanceinfo[instance]
2654 node_drbd[minor] = (instance.name,
2655 instance.admin_state == constants.ADMINST_UP)
2657 # and now check them
2658 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2659 test = not isinstance(used_minors, (tuple, list))
2660 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2661 "cannot parse drbd status file: %s", str(used_minors))
2663 # we cannot check drbd status
2666 for minor, (iname, must_exist) in node_drbd.items():
2667 test = minor not in used_minors and must_exist
2668 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2669 "drbd minor %d of instance %s is not active", minor, iname)
2670 for minor in used_minors:
2671 test = minor not in node_drbd
2672 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2673 "unallocated drbd minor %d is in use", minor)
2675 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2676 """Builds the node OS structures.
2678 @type ninfo: L{objects.Node}
2679 @param ninfo: the node to check
2680 @param nresult: the remote results for the node
2681 @param nimg: the node image object
2685 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2687 remote_os = nresult.get(constants.NV_OSLIST, None)
2688 test = (not isinstance(remote_os, list) or
2689 not compat.all(isinstance(v, list) and len(v) == 7
2690 for v in remote_os))
2692 _ErrorIf(test, constants.CV_ENODEOS, node,
2693 "node hasn't returned valid OS data")
2702 for (name, os_path, status, diagnose,
2703 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2705 if name not in os_dict:
2708 # parameters is a list of lists instead of list of tuples due to
2709 # JSON lacking a real tuple type, fix it:
2710 parameters = [tuple(v) for v in parameters]
2711 os_dict[name].append((os_path, status, diagnose,
2712 set(variants), set(parameters), set(api_ver)))
2714 nimg.oslist = os_dict
2716 def _VerifyNodeOS(self, ninfo, nimg, base):
2717 """Verifies the node OS list.
2719 @type ninfo: L{objects.Node}
2720 @param ninfo: the node to check
2721 @param nimg: the node image object
2722 @param base: the 'template' node we match against (e.g. from the master)
2726 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2728 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2730 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2731 for os_name, os_data in nimg.oslist.items():
2732 assert os_data, "Empty OS status for OS %s?!" % os_name
2733 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2734 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2735 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2736 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2737 "OS '%s' has multiple entries (first one shadows the rest): %s",
2738 os_name, utils.CommaJoin([v[0] for v in os_data]))
2739 # comparisons with the 'base' image
2740 test = os_name not in base.oslist
2741 _ErrorIf(test, constants.CV_ENODEOS, node,
2742 "Extra OS %s not present on reference node (%s)",
2746 assert base.oslist[os_name], "Base node has empty OS status?"
2747 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2749 # base OS is invalid, skipping
2751 for kind, a, b in [("API version", f_api, b_api),
2752 ("variants list", f_var, b_var),
2753 ("parameters", beautify_params(f_param),
2754 beautify_params(b_param))]:
2755 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2756 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2757 kind, os_name, base.name,
2758 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2760 # check any missing OSes
2761 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2762 _ErrorIf(missing, constants.CV_ENODEOS, node,
2763 "OSes present on reference node %s but missing on this node: %s",
2764 base.name, utils.CommaJoin(missing))
2766 def _VerifyOob(self, ninfo, nresult):
2767 """Verifies out of band functionality of a node.
2769 @type ninfo: L{objects.Node}
2770 @param ninfo: the node to check
2771 @param nresult: the remote results for the node
2775 # We just have to verify the paths on master and/or master candidates
2776 # as the oob helper is invoked on the master
2777 if ((ninfo.master_candidate or ninfo.master_capable) and
2778 constants.NV_OOB_PATHS in nresult):
2779 for path_result in nresult[constants.NV_OOB_PATHS]:
2780 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2782 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2783 """Verifies and updates the node volume data.
2785 This function will update a L{NodeImage}'s internal structures
2786 with data from the remote call.
2788 @type ninfo: L{objects.Node}
2789 @param ninfo: the node to check
2790 @param nresult: the remote results for the node
2791 @param nimg: the node image object
2792 @param vg_name: the configured VG name
2796 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2798 nimg.lvm_fail = True
2799 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2802 elif isinstance(lvdata, basestring):
2803 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2804 utils.SafeEncode(lvdata))
2805 elif not isinstance(lvdata, dict):
2806 _ErrorIf(True, constants.CV_ENODELVM, node,
2807 "rpc call to node failed (lvlist)")
2809 nimg.volumes = lvdata
2810 nimg.lvm_fail = False
2812 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2813 """Verifies and updates the node instance list.
2815 If the listing was successful, then updates this node's instance
2816 list. Otherwise, it marks the RPC call as failed for the instance
2819 @type ninfo: L{objects.Node}
2820 @param ninfo: the node to check
2821 @param nresult: the remote results for the node
2822 @param nimg: the node image object
2825 idata = nresult.get(constants.NV_INSTANCELIST, None)
2826 test = not isinstance(idata, list)
2827 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2828 "rpc call to node failed (instancelist): %s",
2829 utils.SafeEncode(str(idata)))
2831 nimg.hyp_fail = True
2833 nimg.instances = idata
2835 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2836 """Verifies and computes a node information map
2838 @type ninfo: L{objects.Node}
2839 @param ninfo: the node to check
2840 @param nresult: the remote results for the node
2841 @param nimg: the node image object
2842 @param vg_name: the configured VG name
2846 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2848 # try to read free memory (from the hypervisor)
2849 hv_info = nresult.get(constants.NV_HVINFO, None)
2850 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2851 _ErrorIf(test, constants.CV_ENODEHV, node,
2852 "rpc call to node failed (hvinfo)")
2855 nimg.mfree = int(hv_info["memory_free"])
2856 except (ValueError, TypeError):
2857 _ErrorIf(True, constants.CV_ENODERPC, node,
2858 "node returned invalid nodeinfo, check hypervisor")
2860 # FIXME: devise a free space model for file based instances as well
2861 if vg_name is not None:
2862 test = (constants.NV_VGLIST not in nresult or
2863 vg_name not in nresult[constants.NV_VGLIST])
2864 _ErrorIf(test, constants.CV_ENODELVM, node,
2865 "node didn't return data for the volume group '%s'"
2866 " - it is either missing or broken", vg_name)
2869 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2870 except (ValueError, TypeError):
2871 _ErrorIf(True, constants.CV_ENODERPC, node,
2872 "node returned invalid LVM info, check LVM status")
2874 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2875 """Gets per-disk status information for all instances.
2877 @type nodelist: list of strings
2878 @param nodelist: Node names
2879 @type node_image: dict of (name, L{objects.Node})
2880 @param node_image: Node objects
2881 @type instanceinfo: dict of (name, L{objects.Instance})
2882 @param instanceinfo: Instance objects
2883 @rtype: {instance: {node: [(succes, payload)]}}
2884 @return: a dictionary of per-instance dictionaries with nodes as
2885 keys and disk information as values; the disk information is a
2886 list of tuples (success, payload)
2889 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2892 node_disks_devonly = {}
2893 diskless_instances = set()
2894 diskless = constants.DT_DISKLESS
2896 for nname in nodelist:
2897 node_instances = list(itertools.chain(node_image[nname].pinst,
2898 node_image[nname].sinst))
2899 diskless_instances.update(inst for inst in node_instances
2900 if instanceinfo[inst].disk_template == diskless)
2901 disks = [(inst, disk)
2902 for inst in node_instances
2903 for disk in instanceinfo[inst].disks]
2906 # No need to collect data
2909 node_disks[nname] = disks
2911 # _AnnotateDiskParams makes already copies of the disks
2913 for (inst, dev) in disks:
2914 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2915 self.cfg.SetDiskID(anno_disk, nname)
2916 devonly.append(anno_disk)
2918 node_disks_devonly[nname] = devonly
2920 assert len(node_disks) == len(node_disks_devonly)
2922 # Collect data from all nodes with disks
2923 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2926 assert len(result) == len(node_disks)
2930 for (nname, nres) in result.items():
2931 disks = node_disks[nname]
2934 # No data from this node
2935 data = len(disks) * [(False, "node offline")]
2938 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2939 "while getting disk information: %s", msg)
2941 # No data from this node
2942 data = len(disks) * [(False, msg)]
2945 for idx, i in enumerate(nres.payload):
2946 if isinstance(i, (tuple, list)) and len(i) == 2:
2949 logging.warning("Invalid result from node %s, entry %d: %s",
2951 data.append((False, "Invalid result from the remote node"))
2953 for ((inst, _), status) in zip(disks, data):
2954 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2956 # Add empty entries for diskless instances.
2957 for inst in diskless_instances:
2958 assert inst not in instdisk
2961 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2962 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2963 compat.all(isinstance(s, (tuple, list)) and
2964 len(s) == 2 for s in statuses)
2965 for inst, nnames in instdisk.items()
2966 for nname, statuses in nnames.items())
2967 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2972 def _SshNodeSelector(group_uuid, all_nodes):
2973 """Create endless iterators for all potential SSH check hosts.
2976 nodes = [node for node in all_nodes
2977 if (node.group != group_uuid and
2979 keyfunc = operator.attrgetter("group")
2981 return map(itertools.cycle,
2982 [sorted(map(operator.attrgetter("name"), names))
2983 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2987 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2988 """Choose which nodes should talk to which other nodes.
2990 We will make nodes contact all nodes in their group, and one node from
2993 @warning: This algorithm has a known issue if one node group is much
2994 smaller than others (e.g. just one node). In such a case all other
2995 nodes will talk to the single node.
2998 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2999 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3001 return (online_nodes,
3002 dict((name, sorted([i.next() for i in sel]))
3003 for name in online_nodes))
3005 def BuildHooksEnv(self):
3008 Cluster-Verify hooks just ran in the post phase and their failure makes
3009 the output be logged in the verify output and the verification to fail.
3013 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3016 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3017 for node in self.my_node_info.values())
3021 def BuildHooksNodes(self):
3022 """Build hooks nodes.
3025 return ([], self.my_node_names)
3027 def Exec(self, feedback_fn):
3028 """Verify integrity of the node group, performing various test on nodes.
3031 # This method has too many local variables. pylint: disable=R0914
3032 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3034 if not self.my_node_names:
3036 feedback_fn("* Empty node group, skipping verification")
3040 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3041 verbose = self.op.verbose
3042 self._feedback_fn = feedback_fn
3044 vg_name = self.cfg.GetVGName()
3045 drbd_helper = self.cfg.GetDRBDHelper()
3046 cluster = self.cfg.GetClusterInfo()
3047 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3048 hypervisors = cluster.enabled_hypervisors
3049 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3051 i_non_redundant = [] # Non redundant instances
3052 i_non_a_balanced = [] # Non auto-balanced instances
3053 i_offline = 0 # Count of offline instances
3054 n_offline = 0 # Count of offline nodes
3055 n_drained = 0 # Count of nodes being drained
3056 node_vol_should = {}
3058 # FIXME: verify OS list
3061 filemap = _ComputeAncillaryFiles(cluster, False)
3063 # do local checksums
3064 master_node = self.master_node = self.cfg.GetMasterNode()
3065 master_ip = self.cfg.GetMasterIP()
3067 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3070 if self.cfg.GetUseExternalMipScript():
3071 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3073 node_verify_param = {
3074 constants.NV_FILELIST:
3075 utils.UniqueSequence(filename
3076 for files in filemap
3077 for filename in files),
3078 constants.NV_NODELIST:
3079 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3080 self.all_node_info.values()),
3081 constants.NV_HYPERVISOR: hypervisors,
3082 constants.NV_HVPARAMS:
3083 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3084 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3085 for node in node_data_list
3086 if not node.offline],
3087 constants.NV_INSTANCELIST: hypervisors,
3088 constants.NV_VERSION: None,
3089 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3090 constants.NV_NODESETUP: None,
3091 constants.NV_TIME: None,
3092 constants.NV_MASTERIP: (master_node, master_ip),
3093 constants.NV_OSLIST: None,
3094 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3095 constants.NV_USERSCRIPTS: user_scripts,
3098 if vg_name is not None:
3099 node_verify_param[constants.NV_VGLIST] = None
3100 node_verify_param[constants.NV_LVLIST] = vg_name
3101 node_verify_param[constants.NV_PVLIST] = [vg_name]
3102 node_verify_param[constants.NV_DRBDLIST] = None
3105 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3108 # FIXME: this needs to be changed per node-group, not cluster-wide
3110 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3111 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3112 bridges.add(default_nicpp[constants.NIC_LINK])
3113 for instance in self.my_inst_info.values():
3114 for nic in instance.nics:
3115 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3116 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3117 bridges.add(full_nic[constants.NIC_LINK])
3120 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3122 # Build our expected cluster state
3123 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3125 vm_capable=node.vm_capable))
3126 for node in node_data_list)
3130 for node in self.all_node_info.values():
3131 path = _SupportsOob(self.cfg, node)
3132 if path and path not in oob_paths:
3133 oob_paths.append(path)
3136 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3138 for instance in self.my_inst_names:
3139 inst_config = self.my_inst_info[instance]
3140 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3143 for nname in inst_config.all_nodes:
3144 if nname not in node_image:
3145 gnode = self.NodeImage(name=nname)
3146 gnode.ghost = (nname not in self.all_node_info)
3147 node_image[nname] = gnode
3149 inst_config.MapLVsByNode(node_vol_should)
3151 pnode = inst_config.primary_node
3152 node_image[pnode].pinst.append(instance)
3154 for snode in inst_config.secondary_nodes:
3155 nimg = node_image[snode]
3156 nimg.sinst.append(instance)
3157 if pnode not in nimg.sbp:
3158 nimg.sbp[pnode] = []
3159 nimg.sbp[pnode].append(instance)
3161 # At this point, we have the in-memory data structures complete,
3162 # except for the runtime information, which we'll gather next
3164 # Due to the way our RPC system works, exact response times cannot be
3165 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3166 # time before and after executing the request, we can at least have a time
3168 nvinfo_starttime = time.time()
3169 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3171 self.cfg.GetClusterName())
3172 nvinfo_endtime = time.time()
3174 if self.extra_lv_nodes and vg_name is not None:
3176 self.rpc.call_node_verify(self.extra_lv_nodes,
3177 {constants.NV_LVLIST: vg_name},
3178 self.cfg.GetClusterName())
3180 extra_lv_nvinfo = {}
3182 all_drbd_map = self.cfg.ComputeDRBDMap()
3184 feedback_fn("* Gathering disk information (%s nodes)" %
3185 len(self.my_node_names))
3186 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3189 feedback_fn("* Verifying configuration file consistency")
3191 # If not all nodes are being checked, we need to make sure the master node
3192 # and a non-checked vm_capable node are in the list.
3193 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3195 vf_nvinfo = all_nvinfo.copy()
3196 vf_node_info = list(self.my_node_info.values())
3197 additional_nodes = []
3198 if master_node not in self.my_node_info:
3199 additional_nodes.append(master_node)
3200 vf_node_info.append(self.all_node_info[master_node])
3201 # Add the first vm_capable node we find which is not included,
3202 # excluding the master node (which we already have)
3203 for node in absent_nodes:
3204 nodeinfo = self.all_node_info[node]
3205 if (nodeinfo.vm_capable and not nodeinfo.offline and
3206 node != master_node):
3207 additional_nodes.append(node)
3208 vf_node_info.append(self.all_node_info[node])
3210 key = constants.NV_FILELIST
3211 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3212 {key: node_verify_param[key]},
3213 self.cfg.GetClusterName()))
3215 vf_nvinfo = all_nvinfo
3216 vf_node_info = self.my_node_info.values()
3218 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3220 feedback_fn("* Verifying node status")
3224 for node_i in node_data_list:
3226 nimg = node_image[node]
3230 feedback_fn("* Skipping offline node %s" % (node,))
3234 if node == master_node:
3236 elif node_i.master_candidate:
3237 ntype = "master candidate"
3238 elif node_i.drained:
3244 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3246 msg = all_nvinfo[node].fail_msg
3247 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3250 nimg.rpc_fail = True
3253 nresult = all_nvinfo[node].payload
3255 nimg.call_ok = self._VerifyNode(node_i, nresult)
3256 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3257 self._VerifyNodeNetwork(node_i, nresult)
3258 self._VerifyNodeUserScripts(node_i, nresult)
3259 self._VerifyOob(node_i, nresult)
3262 self._VerifyNodeLVM(node_i, nresult, vg_name)
3263 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3266 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3267 self._UpdateNodeInstances(node_i, nresult, nimg)
3268 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3269 self._UpdateNodeOS(node_i, nresult, nimg)
3271 if not nimg.os_fail:
3272 if refos_img is None:
3274 self._VerifyNodeOS(node_i, nimg, refos_img)
3275 self._VerifyNodeBridges(node_i, nresult, bridges)
3277 # Check whether all running instancies are primary for the node. (This
3278 # can no longer be done from _VerifyInstance below, since some of the
3279 # wrong instances could be from other node groups.)
3280 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3282 for inst in non_primary_inst:
3283 test = inst in self.all_inst_info
3284 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3285 "instance should not run on node %s", node_i.name)
3286 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3287 "node is running unknown instance %s", inst)
3289 for node, result in extra_lv_nvinfo.items():
3290 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3291 node_image[node], vg_name)
3293 feedback_fn("* Verifying instance status")
3294 for instance in self.my_inst_names:
3296 feedback_fn("* Verifying instance %s" % instance)
3297 inst_config = self.my_inst_info[instance]
3298 self._VerifyInstance(instance, inst_config, node_image,
3300 inst_nodes_offline = []
3302 pnode = inst_config.primary_node
3303 pnode_img = node_image[pnode]
3304 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3305 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3306 " primary node failed", instance)
3308 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3310 constants.CV_EINSTANCEBADNODE, instance,
3311 "instance is marked as running and lives on offline node %s",
3312 inst_config.primary_node)
3314 # If the instance is non-redundant we cannot survive losing its primary
3315 # node, so we are not N+1 compliant. On the other hand we have no disk
3316 # templates with more than one secondary so that situation is not well
3318 # FIXME: does not support file-backed instances
3319 if not inst_config.secondary_nodes:
3320 i_non_redundant.append(instance)
3322 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3323 constants.CV_EINSTANCELAYOUT,
3324 instance, "instance has multiple secondary nodes: %s",
3325 utils.CommaJoin(inst_config.secondary_nodes),
3326 code=self.ETYPE_WARNING)
3328 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3329 pnode = inst_config.primary_node
3330 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3331 instance_groups = {}
3333 for node in instance_nodes:
3334 instance_groups.setdefault(self.all_node_info[node].group,
3338 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3339 # Sort so that we always list the primary node first.
3340 for group, nodes in sorted(instance_groups.items(),
3341 key=lambda (_, nodes): pnode in nodes,
3344 self._ErrorIf(len(instance_groups) > 1,
3345 constants.CV_EINSTANCESPLITGROUPS,
3346 instance, "instance has primary and secondary nodes in"
3347 " different groups: %s", utils.CommaJoin(pretty_list),
3348 code=self.ETYPE_WARNING)
3350 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3351 i_non_a_balanced.append(instance)
3353 for snode in inst_config.secondary_nodes:
3354 s_img = node_image[snode]
3355 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3356 snode, "instance %s, connection to secondary node failed",
3360 inst_nodes_offline.append(snode)
3362 # warn that the instance lives on offline nodes
3363 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3364 "instance has offline secondary node(s) %s",
3365 utils.CommaJoin(inst_nodes_offline))
3366 # ... or ghost/non-vm_capable nodes
3367 for node in inst_config.all_nodes:
3368 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3369 instance, "instance lives on ghost node %s", node)
3370 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3371 instance, "instance lives on non-vm_capable node %s", node)
3373 feedback_fn("* Verifying orphan volumes")
3374 reserved = utils.FieldSet(*cluster.reserved_lvs)
3376 # We will get spurious "unknown volume" warnings if any node of this group
3377 # is secondary for an instance whose primary is in another group. To avoid
3378 # them, we find these instances and add their volumes to node_vol_should.
3379 for inst in self.all_inst_info.values():
3380 for secondary in inst.secondary_nodes:
3381 if (secondary in self.my_node_info
3382 and inst.name not in self.my_inst_info):
3383 inst.MapLVsByNode(node_vol_should)
3386 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3388 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3389 feedback_fn("* Verifying N+1 Memory redundancy")
3390 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3392 feedback_fn("* Other Notes")
3394 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3395 % len(i_non_redundant))
3397 if i_non_a_balanced:
3398 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3399 % len(i_non_a_balanced))
3402 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3405 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3408 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3412 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3413 """Analyze the post-hooks' result
3415 This method analyses the hook result, handles it, and sends some
3416 nicely-formatted feedback back to the user.
3418 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3419 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3420 @param hooks_results: the results of the multi-node hooks rpc call
3421 @param feedback_fn: function used send feedback back to the caller
3422 @param lu_result: previous Exec result
3423 @return: the new Exec result, based on the previous result
3427 # We only really run POST phase hooks, only for non-empty groups,
3428 # and are only interested in their results
3429 if not self.my_node_names:
3432 elif phase == constants.HOOKS_PHASE_POST:
3433 # Used to change hooks' output to proper indentation
3434 feedback_fn("* Hooks Results")
3435 assert hooks_results, "invalid result from hooks"
3437 for node_name in hooks_results:
3438 res = hooks_results[node_name]
3440 test = msg and not res.offline
3441 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3442 "Communication failure in hooks execution: %s", msg)
3443 if res.offline or msg:
3444 # No need to investigate payload if node is offline or gave
3447 for script, hkr, output in res.payload:
3448 test = hkr == constants.HKR_FAIL
3449 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3450 "Script %s failed, output:", script)
3452 output = self._HOOKS_INDENT_RE.sub(" ", output)
3453 feedback_fn("%s" % output)
3459 class LUClusterVerifyDisks(NoHooksLU):
3460 """Verifies the cluster disks status.
3465 def ExpandNames(self):
3466 self.share_locks = _ShareAll()
3467 self.needed_locks = {
3468 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3471 def Exec(self, feedback_fn):
3472 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3474 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3475 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3476 for group in group_names])
3479 class LUGroupVerifyDisks(NoHooksLU):
3480 """Verifies the status of all disks in a node group.
3485 def ExpandNames(self):
3486 # Raises errors.OpPrereqError on its own if group can't be found
3487 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3489 self.share_locks = _ShareAll()
3490 self.needed_locks = {
3491 locking.LEVEL_INSTANCE: [],
3492 locking.LEVEL_NODEGROUP: [],
3493 locking.LEVEL_NODE: [],
3496 def DeclareLocks(self, level):
3497 if level == locking.LEVEL_INSTANCE:
3498 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3500 # Lock instances optimistically, needs verification once node and group
3501 # locks have been acquired
3502 self.needed_locks[locking.LEVEL_INSTANCE] = \
3503 self.cfg.GetNodeGroupInstances(self.group_uuid)
3505 elif level == locking.LEVEL_NODEGROUP:
3506 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3508 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3509 set([self.group_uuid] +
3510 # Lock all groups used by instances optimistically; this requires
3511 # going via the node before it's locked, requiring verification
3514 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3515 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3517 elif level == locking.LEVEL_NODE:
3518 # This will only lock the nodes in the group to be verified which contain
3520 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3521 self._LockInstancesNodes()
3523 # Lock all nodes in group to be verified
3524 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3525 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3526 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3528 def CheckPrereq(self):
3529 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3530 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3531 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3533 assert self.group_uuid in owned_groups
3535 # Check if locked instances are still correct
3536 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3538 # Get instance information
3539 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3541 # Check if node groups for locked instances are still correct
3542 _CheckInstancesNodeGroups(self.cfg, self.instances,
3543 owned_groups, owned_nodes, self.group_uuid)
3545 def Exec(self, feedback_fn):
3546 """Verify integrity of cluster disks.
3548 @rtype: tuple of three items
3549 @return: a tuple of (dict of node-to-node_error, list of instances
3550 which need activate-disks, dict of instance: (node, volume) for
3555 res_instances = set()
3558 nv_dict = _MapInstanceDisksToNodes(
3559 [inst for inst in self.instances.values()
3560 if inst.admin_state == constants.ADMINST_UP])
3563 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3564 set(self.cfg.GetVmCapableNodeList()))
3566 node_lvs = self.rpc.call_lv_list(nodes, [])
3568 for (node, node_res) in node_lvs.items():
3569 if node_res.offline:
3572 msg = node_res.fail_msg
3574 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3575 res_nodes[node] = msg
3578 for lv_name, (_, _, lv_online) in node_res.payload.items():
3579 inst = nv_dict.pop((node, lv_name), None)
3580 if not (lv_online or inst is None):
3581 res_instances.add(inst)
3583 # any leftover items in nv_dict are missing LVs, let's arrange the data
3585 for key, inst in nv_dict.iteritems():
3586 res_missing.setdefault(inst, []).append(list(key))
3588 return (res_nodes, list(res_instances), res_missing)
3591 class LUClusterRepairDiskSizes(NoHooksLU):
3592 """Verifies the cluster disks sizes.
3597 def ExpandNames(self):
3598 if self.op.instances:
3599 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3600 self.needed_locks = {
3601 locking.LEVEL_NODE_RES: [],
3602 locking.LEVEL_INSTANCE: self.wanted_names,
3604 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3606 self.wanted_names = None
3607 self.needed_locks = {
3608 locking.LEVEL_NODE_RES: locking.ALL_SET,
3609 locking.LEVEL_INSTANCE: locking.ALL_SET,
3611 self.share_locks = {
3612 locking.LEVEL_NODE_RES: 1,
3613 locking.LEVEL_INSTANCE: 0,
3616 def DeclareLocks(self, level):
3617 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3618 self._LockInstancesNodes(primary_only=True, level=level)
3620 def CheckPrereq(self):
3621 """Check prerequisites.
3623 This only checks the optional instance list against the existing names.
3626 if self.wanted_names is None:
3627 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3629 self.wanted_instances = \
3630 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3632 def _EnsureChildSizes(self, disk):
3633 """Ensure children of the disk have the needed disk size.
3635 This is valid mainly for DRBD8 and fixes an issue where the
3636 children have smaller disk size.
3638 @param disk: an L{ganeti.objects.Disk} object
3641 if disk.dev_type == constants.LD_DRBD8:
3642 assert disk.children, "Empty children for DRBD8?"
3643 fchild = disk.children[0]
3644 mismatch = fchild.size < disk.size
3646 self.LogInfo("Child disk has size %d, parent %d, fixing",
3647 fchild.size, disk.size)
3648 fchild.size = disk.size
3650 # and we recurse on this child only, not on the metadev
3651 return self._EnsureChildSizes(fchild) or mismatch
3655 def Exec(self, feedback_fn):
3656 """Verify the size of cluster disks.
3659 # TODO: check child disks too
3660 # TODO: check differences in size between primary/secondary nodes
3662 for instance in self.wanted_instances:
3663 pnode = instance.primary_node
3664 if pnode not in per_node_disks:
3665 per_node_disks[pnode] = []
3666 for idx, disk in enumerate(instance.disks):
3667 per_node_disks[pnode].append((instance, idx, disk))
3669 assert not (frozenset(per_node_disks.keys()) -
3670 self.owned_locks(locking.LEVEL_NODE_RES)), \
3671 "Not owning correct locks"
3672 assert not self.owned_locks(locking.LEVEL_NODE)
3675 for node, dskl in per_node_disks.items():
3676 newl = [v[2].Copy() for v in dskl]
3678 self.cfg.SetDiskID(dsk, node)
3679 result = self.rpc.call_blockdev_getsize(node, newl)
3681 self.LogWarning("Failure in blockdev_getsize call to node"
3682 " %s, ignoring", node)
3684 if len(result.payload) != len(dskl):
3685 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3686 " result.payload=%s", node, len(dskl), result.payload)
3687 self.LogWarning("Invalid result from node %s, ignoring node results",
3690 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3692 self.LogWarning("Disk %d of instance %s did not return size"
3693 " information, ignoring", idx, instance.name)
3695 if not isinstance(size, (int, long)):
3696 self.LogWarning("Disk %d of instance %s did not return valid"
3697 " size information, ignoring", idx, instance.name)
3700 if size != disk.size:
3701 self.LogInfo("Disk %d of instance %s has mismatched size,"
3702 " correcting: recorded %d, actual %d", idx,
3703 instance.name, disk.size, size)
3705 self.cfg.Update(instance, feedback_fn)
3706 changed.append((instance.name, idx, size))
3707 if self._EnsureChildSizes(disk):
3708 self.cfg.Update(instance, feedback_fn)
3709 changed.append((instance.name, idx, disk.size))
3713 class LUClusterRename(LogicalUnit):
3714 """Rename the cluster.
3717 HPATH = "cluster-rename"
3718 HTYPE = constants.HTYPE_CLUSTER
3720 def BuildHooksEnv(self):
3725 "OP_TARGET": self.cfg.GetClusterName(),
3726 "NEW_NAME": self.op.name,
3729 def BuildHooksNodes(self):
3730 """Build hooks nodes.
3733 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3735 def CheckPrereq(self):
3736 """Verify that the passed name is a valid one.
3739 hostname = netutils.GetHostname(name=self.op.name,
3740 family=self.cfg.GetPrimaryIPFamily())
3742 new_name = hostname.name
3743 self.ip = new_ip = hostname.ip
3744 old_name = self.cfg.GetClusterName()
3745 old_ip = self.cfg.GetMasterIP()
3746 if new_name == old_name and new_ip == old_ip:
3747 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3748 " cluster has changed",
3750 if new_ip != old_ip:
3751 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3752 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3753 " reachable on the network" %
3754 new_ip, errors.ECODE_NOTUNIQUE)
3756 self.op.name = new_name
3758 def Exec(self, feedback_fn):
3759 """Rename the cluster.
3762 clustername = self.op.name
3765 # shutdown the master IP
3766 master_params = self.cfg.GetMasterNetworkParameters()
3767 ems = self.cfg.GetUseExternalMipScript()
3768 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3770 result.Raise("Could not disable the master role")
3773 cluster = self.cfg.GetClusterInfo()
3774 cluster.cluster_name = clustername
3775 cluster.master_ip = new_ip
3776 self.cfg.Update(cluster, feedback_fn)
3778 # update the known hosts file
3779 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3780 node_list = self.cfg.GetOnlineNodeList()
3782 node_list.remove(master_params.name)
3785 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3787 master_params.ip = new_ip
3788 result = self.rpc.call_node_activate_master_ip(master_params.name,
3790 msg = result.fail_msg
3792 self.LogWarning("Could not re-enable the master role on"
3793 " the master, please restart manually: %s", msg)
3798 def _ValidateNetmask(cfg, netmask):
3799 """Checks if a netmask is valid.
3801 @type cfg: L{config.ConfigWriter}
3802 @param cfg: The cluster configuration
3804 @param netmask: the netmask to be verified
3805 @raise errors.OpPrereqError: if the validation fails
3808 ip_family = cfg.GetPrimaryIPFamily()
3810 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3811 except errors.ProgrammerError:
3812 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3813 ip_family, errors.ECODE_INVAL)
3814 if not ipcls.ValidateNetmask(netmask):
3815 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3816 (netmask), errors.ECODE_INVAL)
3819 class LUClusterSetParams(LogicalUnit):
3820 """Change the parameters of the cluster.
3823 HPATH = "cluster-modify"
3824 HTYPE = constants.HTYPE_CLUSTER
3827 def CheckArguments(self):
3831 if self.op.uid_pool:
3832 uidpool.CheckUidPool(self.op.uid_pool)
3834 if self.op.add_uids:
3835 uidpool.CheckUidPool(self.op.add_uids)
3837 if self.op.remove_uids:
3838 uidpool.CheckUidPool(self.op.remove_uids)
3840 if self.op.master_netmask is not None:
3841 _ValidateNetmask(self.cfg, self.op.master_netmask)
3843 if self.op.diskparams:
3844 for dt_params in self.op.diskparams.values():
3845 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3847 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3848 except errors.OpPrereqError, err:
3849 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3852 def ExpandNames(self):
3853 # FIXME: in the future maybe other cluster params won't require checking on
3854 # all nodes to be modified.
3855 self.needed_locks = {
3856 locking.LEVEL_NODE: locking.ALL_SET,
3857 locking.LEVEL_INSTANCE: locking.ALL_SET,
3858 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3860 self.share_locks = {
3861 locking.LEVEL_NODE: 1,
3862 locking.LEVEL_INSTANCE: 1,
3863 locking.LEVEL_NODEGROUP: 1,
3866 def BuildHooksEnv(self):
3871 "OP_TARGET": self.cfg.GetClusterName(),
3872 "NEW_VG_NAME": self.op.vg_name,
3875 def BuildHooksNodes(self):
3876 """Build hooks nodes.
3879 mn = self.cfg.GetMasterNode()
3882 def CheckPrereq(self):
3883 """Check prerequisites.
3885 This checks whether the given params don't conflict and
3886 if the given volume group is valid.
3889 if self.op.vg_name is not None and not self.op.vg_name:
3890 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3891 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3892 " instances exist", errors.ECODE_INVAL)
3894 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3895 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3896 raise errors.OpPrereqError("Cannot disable drbd helper while"
3897 " drbd-based instances exist",
3900 node_list = self.owned_locks(locking.LEVEL_NODE)
3902 # if vg_name not None, checks given volume group on all nodes
3904 vglist = self.rpc.call_vg_list(node_list)
3905 for node in node_list:
3906 msg = vglist[node].fail_msg
3908 # ignoring down node
3909 self.LogWarning("Error while gathering data on node %s"
3910 " (ignoring node): %s", node, msg)
3912 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3914 constants.MIN_VG_SIZE)
3916 raise errors.OpPrereqError("Error on node '%s': %s" %
3917 (node, vgstatus), errors.ECODE_ENVIRON)
3919 if self.op.drbd_helper:
3920 # checks given drbd helper on all nodes
3921 helpers = self.rpc.call_drbd_helper(node_list)
3922 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3924 self.LogInfo("Not checking drbd helper on offline node %s", node)
3926 msg = helpers[node].fail_msg
3928 raise errors.OpPrereqError("Error checking drbd helper on node"
3929 " '%s': %s" % (node, msg),
3930 errors.ECODE_ENVIRON)
3931 node_helper = helpers[node].payload
3932 if node_helper != self.op.drbd_helper:
3933 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3934 (node, node_helper), errors.ECODE_ENVIRON)
3936 self.cluster = cluster = self.cfg.GetClusterInfo()
3937 # validate params changes
3938 if self.op.beparams:
3939 objects.UpgradeBeParams(self.op.beparams)
3940 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3941 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3943 if self.op.ndparams:
3944 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3945 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3947 # TODO: we need a more general way to handle resetting
3948 # cluster-level parameters to default values
3949 if self.new_ndparams["oob_program"] == "":
3950 self.new_ndparams["oob_program"] = \
3951 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3953 if self.op.hv_state:
3954 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3955 self.cluster.hv_state_static)
3956 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3957 for hv, values in new_hv_state.items())
3959 if self.op.disk_state:
3960 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3961 self.cluster.disk_state_static)
3962 self.new_disk_state = \
3963 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3964 for name, values in svalues.items()))
3965 for storage, svalues in new_disk_state.items())
3968 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3971 all_instances = self.cfg.GetAllInstancesInfo().values()
3973 for group in self.cfg.GetAllNodeGroupsInfo().values():
3974 instances = frozenset([inst for inst in all_instances
3975 if compat.any(node in group.members
3976 for node in inst.all_nodes)])
3977 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3978 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3979 new = _ComputeNewInstanceViolations(ipol,
3980 new_ipolicy, instances)
3982 violations.update(new)
3985 self.LogWarning("After the ipolicy change the following instances"
3986 " violate them: %s",
3987 utils.CommaJoin(utils.NiceSort(violations)))
3989 if self.op.nicparams:
3990 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3991 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3992 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3995 # check all instances for consistency
3996 for instance in self.cfg.GetAllInstancesInfo().values():
3997 for nic_idx, nic in enumerate(instance.nics):
3998 params_copy = copy.deepcopy(nic.nicparams)
3999 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4001 # check parameter syntax
4003 objects.NIC.CheckParameterSyntax(params_filled)
4004 except errors.ConfigurationError, err:
4005 nic_errors.append("Instance %s, nic/%d: %s" %
4006 (instance.name, nic_idx, err))
4008 # if we're moving instances to routed, check that they have an ip
4009 target_mode = params_filled[constants.NIC_MODE]
4010 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4011 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4012 " address" % (instance.name, nic_idx))
4014 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4015 "\n".join(nic_errors), errors.ECODE_INVAL)
4017 # hypervisor list/parameters
4018 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4019 if self.op.hvparams:
4020 for hv_name, hv_dict in self.op.hvparams.items():
4021 if hv_name not in self.new_hvparams:
4022 self.new_hvparams[hv_name] = hv_dict
4024 self.new_hvparams[hv_name].update(hv_dict)
4026 # disk template parameters
4027 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4028 if self.op.diskparams:
4029 for dt_name, dt_params in self.op.diskparams.items():
4030 if dt_name not in self.op.diskparams:
4031 self.new_diskparams[dt_name] = dt_params
4033 self.new_diskparams[dt_name].update(dt_params)
4035 # os hypervisor parameters
4036 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4038 for os_name, hvs in self.op.os_hvp.items():
4039 if os_name not in self.new_os_hvp:
4040 self.new_os_hvp[os_name] = hvs
4042 for hv_name, hv_dict in hvs.items():
4043 if hv_name not in self.new_os_hvp[os_name]:
4044 self.new_os_hvp[os_name][hv_name] = hv_dict
4046 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4049 self.new_osp = objects.FillDict(cluster.osparams, {})
4050 if self.op.osparams:
4051 for os_name, osp in self.op.osparams.items():
4052 if os_name not in self.new_osp:
4053 self.new_osp[os_name] = {}
4055 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4058 if not self.new_osp[os_name]:
4059 # we removed all parameters
4060 del self.new_osp[os_name]
4062 # check the parameter validity (remote check)
4063 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4064 os_name, self.new_osp[os_name])
4066 # changes to the hypervisor list
4067 if self.op.enabled_hypervisors is not None:
4068 self.hv_list = self.op.enabled_hypervisors
4069 for hv in self.hv_list:
4070 # if the hypervisor doesn't already exist in the cluster
4071 # hvparams, we initialize it to empty, and then (in both
4072 # cases) we make sure to fill the defaults, as we might not
4073 # have a complete defaults list if the hypervisor wasn't
4075 if hv not in new_hvp:
4077 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4078 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4080 self.hv_list = cluster.enabled_hypervisors
4082 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4083 # either the enabled list has changed, or the parameters have, validate
4084 for hv_name, hv_params in self.new_hvparams.items():
4085 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4086 (self.op.enabled_hypervisors and
4087 hv_name in self.op.enabled_hypervisors)):
4088 # either this is a new hypervisor, or its parameters have changed
4089 hv_class = hypervisor.GetHypervisor(hv_name)
4090 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4091 hv_class.CheckParameterSyntax(hv_params)
4092 _CheckHVParams(self, node_list, hv_name, hv_params)
4095 # no need to check any newly-enabled hypervisors, since the
4096 # defaults have already been checked in the above code-block
4097 for os_name, os_hvp in self.new_os_hvp.items():
4098 for hv_name, hv_params in os_hvp.items():
4099 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4100 # we need to fill in the new os_hvp on top of the actual hv_p
4101 cluster_defaults = self.new_hvparams.get(hv_name, {})
4102 new_osp = objects.FillDict(cluster_defaults, hv_params)
4103 hv_class = hypervisor.GetHypervisor(hv_name)
4104 hv_class.CheckParameterSyntax(new_osp)
4105 _CheckHVParams(self, node_list, hv_name, new_osp)
4107 if self.op.default_iallocator:
4108 alloc_script = utils.FindFile(self.op.default_iallocator,
4109 constants.IALLOCATOR_SEARCH_PATH,
4111 if alloc_script is None:
4112 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4113 " specified" % self.op.default_iallocator,
4116 def Exec(self, feedback_fn):
4117 """Change the parameters of the cluster.
4120 if self.op.vg_name is not None:
4121 new_volume = self.op.vg_name
4124 if new_volume != self.cfg.GetVGName():
4125 self.cfg.SetVGName(new_volume)
4127 feedback_fn("Cluster LVM configuration already in desired"
4128 " state, not changing")
4129 if self.op.drbd_helper is not None:
4130 new_helper = self.op.drbd_helper
4133 if new_helper != self.cfg.GetDRBDHelper():
4134 self.cfg.SetDRBDHelper(new_helper)
4136 feedback_fn("Cluster DRBD helper already in desired state,"
4138 if self.op.hvparams:
4139 self.cluster.hvparams = self.new_hvparams
4141 self.cluster.os_hvp = self.new_os_hvp
4142 if self.op.enabled_hypervisors is not None:
4143 self.cluster.hvparams = self.new_hvparams
4144 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4145 if self.op.beparams:
4146 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4147 if self.op.nicparams:
4148 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4150 self.cluster.ipolicy = self.new_ipolicy
4151 if self.op.osparams:
4152 self.cluster.osparams = self.new_osp
4153 if self.op.ndparams:
4154 self.cluster.ndparams = self.new_ndparams
4155 if self.op.diskparams:
4156 self.cluster.diskparams = self.new_diskparams
4157 if self.op.hv_state:
4158 self.cluster.hv_state_static = self.new_hv_state
4159 if self.op.disk_state:
4160 self.cluster.disk_state_static = self.new_disk_state
4162 if self.op.candidate_pool_size is not None:
4163 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4164 # we need to update the pool size here, otherwise the save will fail
4165 _AdjustCandidatePool(self, [])
4167 if self.op.maintain_node_health is not None:
4168 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4169 feedback_fn("Note: CONFD was disabled at build time, node health"
4170 " maintenance is not useful (still enabling it)")
4171 self.cluster.maintain_node_health = self.op.maintain_node_health
4173 if self.op.prealloc_wipe_disks is not None:
4174 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4176 if self.op.add_uids is not None:
4177 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4179 if self.op.remove_uids is not None:
4180 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4182 if self.op.uid_pool is not None:
4183 self.cluster.uid_pool = self.op.uid_pool
4185 if self.op.default_iallocator is not None:
4186 self.cluster.default_iallocator = self.op.default_iallocator
4188 if self.op.reserved_lvs is not None:
4189 self.cluster.reserved_lvs = self.op.reserved_lvs
4191 if self.op.use_external_mip_script is not None:
4192 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4194 def helper_os(aname, mods, desc):
4196 lst = getattr(self.cluster, aname)
4197 for key, val in mods:
4198 if key == constants.DDM_ADD:
4200 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4203 elif key == constants.DDM_REMOVE:
4207 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4209 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4211 if self.op.hidden_os:
4212 helper_os("hidden_os", self.op.hidden_os, "hidden")
4214 if self.op.blacklisted_os:
4215 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4217 if self.op.master_netdev:
4218 master_params = self.cfg.GetMasterNetworkParameters()
4219 ems = self.cfg.GetUseExternalMipScript()
4220 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4221 self.cluster.master_netdev)
4222 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4224 result.Raise("Could not disable the master ip")
4225 feedback_fn("Changing master_netdev from %s to %s" %
4226 (master_params.netdev, self.op.master_netdev))
4227 self.cluster.master_netdev = self.op.master_netdev
4229 if self.op.master_netmask:
4230 master_params = self.cfg.GetMasterNetworkParameters()
4231 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4232 result = self.rpc.call_node_change_master_netmask(master_params.name,
4233 master_params.netmask,
4234 self.op.master_netmask,
4236 master_params.netdev)
4238 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4241 self.cluster.master_netmask = self.op.master_netmask
4243 self.cfg.Update(self.cluster, feedback_fn)
4245 if self.op.master_netdev:
4246 master_params = self.cfg.GetMasterNetworkParameters()
4247 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4248 self.op.master_netdev)
4249 ems = self.cfg.GetUseExternalMipScript()
4250 result = self.rpc.call_node_activate_master_ip(master_params.name,
4253 self.LogWarning("Could not re-enable the master ip on"
4254 " the master, please restart manually: %s",
4258 def _UploadHelper(lu, nodes, fname):
4259 """Helper for uploading a file and showing warnings.
4262 if os.path.exists(fname):
4263 result = lu.rpc.call_upload_file(nodes, fname)
4264 for to_node, to_result in result.items():
4265 msg = to_result.fail_msg
4267 msg = ("Copy of file %s to node %s failed: %s" %
4268 (fname, to_node, msg))
4269 lu.proc.LogWarning(msg)
4272 def _ComputeAncillaryFiles(cluster, redist):
4273 """Compute files external to Ganeti which need to be consistent.
4275 @type redist: boolean
4276 @param redist: Whether to include files which need to be redistributed
4279 # Compute files for all nodes
4281 constants.SSH_KNOWN_HOSTS_FILE,
4282 constants.CONFD_HMAC_KEY,
4283 constants.CLUSTER_DOMAIN_SECRET_FILE,
4284 constants.SPICE_CERT_FILE,
4285 constants.SPICE_CACERT_FILE,
4286 constants.RAPI_USERS_FILE,
4290 files_all.update(constants.ALL_CERT_FILES)
4291 files_all.update(ssconf.SimpleStore().GetFileList())
4293 # we need to ship at least the RAPI certificate
4294 files_all.add(constants.RAPI_CERT_FILE)
4296 if cluster.modify_etc_hosts:
4297 files_all.add(constants.ETC_HOSTS)
4299 if cluster.use_external_mip_script:
4300 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4302 # Files which are optional, these must:
4303 # - be present in one other category as well
4304 # - either exist or not exist on all nodes of that category (mc, vm all)
4306 constants.RAPI_USERS_FILE,
4309 # Files which should only be on master candidates
4313 files_mc.add(constants.CLUSTER_CONF_FILE)
4315 # Files which should only be on VM-capable nodes
4318 for hv_name in cluster.enabled_hypervisors
4319 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4323 for hv_name in cluster.enabled_hypervisors
4324 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4326 # Filenames in each category must be unique
4327 all_files_set = files_all | files_mc | files_vm
4328 assert (len(all_files_set) ==
4329 sum(map(len, [files_all, files_mc, files_vm]))), \
4330 "Found file listed in more than one file list"
4332 # Optional files must be present in one other category
4333 assert all_files_set.issuperset(files_opt), \
4334 "Optional file not in a different required list"
4336 return (files_all, files_opt, files_mc, files_vm)
4339 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4340 """Distribute additional files which are part of the cluster configuration.
4342 ConfigWriter takes care of distributing the config and ssconf files, but
4343 there are more files which should be distributed to all nodes. This function
4344 makes sure those are copied.
4346 @param lu: calling logical unit
4347 @param additional_nodes: list of nodes not in the config to distribute to
4348 @type additional_vm: boolean
4349 @param additional_vm: whether the additional nodes are vm-capable or not
4352 # Gather target nodes
4353 cluster = lu.cfg.GetClusterInfo()
4354 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4356 online_nodes = lu.cfg.GetOnlineNodeList()
4357 online_set = frozenset(online_nodes)
4358 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4360 if additional_nodes is not None:
4361 online_nodes.extend(additional_nodes)
4363 vm_nodes.extend(additional_nodes)
4365 # Never distribute to master node
4366 for nodelist in [online_nodes, vm_nodes]:
4367 if master_info.name in nodelist:
4368 nodelist.remove(master_info.name)
4371 (files_all, _, files_mc, files_vm) = \
4372 _ComputeAncillaryFiles(cluster, True)
4374 # Never re-distribute configuration file from here
4375 assert not (constants.CLUSTER_CONF_FILE in files_all or
4376 constants.CLUSTER_CONF_FILE in files_vm)
4377 assert not files_mc, "Master candidates not handled in this function"
4380 (online_nodes, files_all),
4381 (vm_nodes, files_vm),
4385 for (node_list, files) in filemap:
4387 _UploadHelper(lu, node_list, fname)
4390 class LUClusterRedistConf(NoHooksLU):
4391 """Force the redistribution of cluster configuration.
4393 This is a very simple LU.
4398 def ExpandNames(self):
4399 self.needed_locks = {
4400 locking.LEVEL_NODE: locking.ALL_SET,
4402 self.share_locks[locking.LEVEL_NODE] = 1
4404 def Exec(self, feedback_fn):
4405 """Redistribute the configuration.
4408 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4409 _RedistributeAncillaryFiles(self)
4412 class LUClusterActivateMasterIp(NoHooksLU):
4413 """Activate the master IP on the master node.
4416 def Exec(self, feedback_fn):
4417 """Activate the master IP.
4420 master_params = self.cfg.GetMasterNetworkParameters()
4421 ems = self.cfg.GetUseExternalMipScript()
4422 result = self.rpc.call_node_activate_master_ip(master_params.name,
4424 result.Raise("Could not activate the master IP")
4427 class LUClusterDeactivateMasterIp(NoHooksLU):
4428 """Deactivate the master IP on the master node.
4431 def Exec(self, feedback_fn):
4432 """Deactivate the master IP.
4435 master_params = self.cfg.GetMasterNetworkParameters()
4436 ems = self.cfg.GetUseExternalMipScript()
4437 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4439 result.Raise("Could not deactivate the master IP")
4442 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4443 """Sleep and poll for an instance's disk to sync.
4446 if not instance.disks or disks is not None and not disks:
4449 disks = _ExpandCheckDisks(instance, disks)
4452 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4454 node = instance.primary_node
4457 lu.cfg.SetDiskID(dev, node)
4459 # TODO: Convert to utils.Retry
4462 degr_retries = 10 # in seconds, as we sleep 1 second each time
4466 cumul_degraded = False
4467 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4468 msg = rstats.fail_msg
4470 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4473 raise errors.RemoteError("Can't contact node %s for mirror data,"
4474 " aborting." % node)
4477 rstats = rstats.payload
4479 for i, mstat in enumerate(rstats):
4481 lu.LogWarning("Can't compute data for node %s/%s",
4482 node, disks[i].iv_name)
4485 cumul_degraded = (cumul_degraded or
4486 (mstat.is_degraded and mstat.sync_percent is None))
4487 if mstat.sync_percent is not None:
4489 if mstat.estimated_time is not None:
4490 rem_time = ("%s remaining (estimated)" %
4491 utils.FormatSeconds(mstat.estimated_time))
4492 max_time = mstat.estimated_time
4494 rem_time = "no time estimate"
4495 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4496 (disks[i].iv_name, mstat.sync_percent, rem_time))
4498 # if we're done but degraded, let's do a few small retries, to
4499 # make sure we see a stable and not transient situation; therefore
4500 # we force restart of the loop
4501 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4502 logging.info("Degraded disks found, %d retries left", degr_retries)
4510 time.sleep(min(60, max_time))
4513 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4514 return not cumul_degraded
4517 def _BlockdevFind(lu, node, dev, instance):
4518 """Wrapper around call_blockdev_find to annotate diskparams.
4520 @param lu: A reference to the lu object
4521 @param node: The node to call out
4522 @param dev: The device to find
4523 @param instance: The instance object the device belongs to
4524 @returns The result of the rpc call
4527 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4528 return lu.rpc.call_blockdev_find(node, disk)
4531 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4532 """Wrapper around L{_CheckDiskConsistencyInner}.
4535 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4536 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4540 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4542 """Check that mirrors are not degraded.
4544 @attention: The device has to be annotated already.
4546 The ldisk parameter, if True, will change the test from the
4547 is_degraded attribute (which represents overall non-ok status for
4548 the device(s)) to the ldisk (representing the local storage status).
4551 lu.cfg.SetDiskID(dev, node)
4555 if on_primary or dev.AssembleOnSecondary():
4556 rstats = lu.rpc.call_blockdev_find(node, dev)
4557 msg = rstats.fail_msg
4559 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4561 elif not rstats.payload:
4562 lu.LogWarning("Can't find disk on node %s", node)
4566 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4568 result = result and not rstats.payload.is_degraded
4571 for child in dev.children:
4572 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4578 class LUOobCommand(NoHooksLU):
4579 """Logical unit for OOB handling.
4583 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4585 def ExpandNames(self):
4586 """Gather locks we need.
4589 if self.op.node_names:
4590 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4591 lock_names = self.op.node_names
4593 lock_names = locking.ALL_SET
4595 self.needed_locks = {
4596 locking.LEVEL_NODE: lock_names,
4599 def CheckPrereq(self):
4600 """Check prerequisites.
4603 - the node exists in the configuration
4606 Any errors are signaled by raising errors.OpPrereqError.
4610 self.master_node = self.cfg.GetMasterNode()
4612 assert self.op.power_delay >= 0.0
4614 if self.op.node_names:
4615 if (self.op.command in self._SKIP_MASTER and
4616 self.master_node in self.op.node_names):
4617 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4618 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4620 if master_oob_handler:
4621 additional_text = ("run '%s %s %s' if you want to operate on the"
4622 " master regardless") % (master_oob_handler,
4626 additional_text = "it does not support out-of-band operations"
4628 raise errors.OpPrereqError(("Operating on the master node %s is not"
4629 " allowed for %s; %s") %
4630 (self.master_node, self.op.command,
4631 additional_text), errors.ECODE_INVAL)
4633 self.op.node_names = self.cfg.GetNodeList()
4634 if self.op.command in self._SKIP_MASTER:
4635 self.op.node_names.remove(self.master_node)
4637 if self.op.command in self._SKIP_MASTER:
4638 assert self.master_node not in self.op.node_names
4640 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4642 raise errors.OpPrereqError("Node %s not found" % node_name,
4645 self.nodes.append(node)
4647 if (not self.op.ignore_status and
4648 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4649 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4650 " not marked offline") % node_name,
4653 def Exec(self, feedback_fn):
4654 """Execute OOB and return result if we expect any.
4657 master_node = self.master_node
4660 for idx, node in enumerate(utils.NiceSort(self.nodes,
4661 key=lambda node: node.name)):
4662 node_entry = [(constants.RS_NORMAL, node.name)]
4663 ret.append(node_entry)
4665 oob_program = _SupportsOob(self.cfg, node)
4668 node_entry.append((constants.RS_UNAVAIL, None))
4671 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4672 self.op.command, oob_program, node.name)
4673 result = self.rpc.call_run_oob(master_node, oob_program,
4674 self.op.command, node.name,
4678 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4679 node.name, result.fail_msg)
4680 node_entry.append((constants.RS_NODATA, None))
4683 self._CheckPayload(result)
4684 except errors.OpExecError, err:
4685 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4687 node_entry.append((constants.RS_NODATA, None))
4689 if self.op.command == constants.OOB_HEALTH:
4690 # For health we should log important events
4691 for item, status in result.payload:
4692 if status in [constants.OOB_STATUS_WARNING,
4693 constants.OOB_STATUS_CRITICAL]:
4694 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4695 item, node.name, status)
4697 if self.op.command == constants.OOB_POWER_ON:
4699 elif self.op.command == constants.OOB_POWER_OFF:
4700 node.powered = False
4701 elif self.op.command == constants.OOB_POWER_STATUS:
4702 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4703 if powered != node.powered:
4704 logging.warning(("Recorded power state (%s) of node '%s' does not"
4705 " match actual power state (%s)"), node.powered,
4708 # For configuration changing commands we should update the node
4709 if self.op.command in (constants.OOB_POWER_ON,
4710 constants.OOB_POWER_OFF):
4711 self.cfg.Update(node, feedback_fn)
4713 node_entry.append((constants.RS_NORMAL, result.payload))
4715 if (self.op.command == constants.OOB_POWER_ON and
4716 idx < len(self.nodes) - 1):
4717 time.sleep(self.op.power_delay)
4721 def _CheckPayload(self, result):
4722 """Checks if the payload is valid.
4724 @param result: RPC result
4725 @raises errors.OpExecError: If payload is not valid
4729 if self.op.command == constants.OOB_HEALTH:
4730 if not isinstance(result.payload, list):
4731 errs.append("command 'health' is expected to return a list but got %s" %
4732 type(result.payload))
4734 for item, status in result.payload:
4735 if status not in constants.OOB_STATUSES:
4736 errs.append("health item '%s' has invalid status '%s'" %
4739 if self.op.command == constants.OOB_POWER_STATUS:
4740 if not isinstance(result.payload, dict):
4741 errs.append("power-status is expected to return a dict but got %s" %
4742 type(result.payload))
4744 if self.op.command in [
4745 constants.OOB_POWER_ON,
4746 constants.OOB_POWER_OFF,
4747 constants.OOB_POWER_CYCLE,
4749 if result.payload is not None:
4750 errs.append("%s is expected to not return payload but got '%s'" %
4751 (self.op.command, result.payload))
4754 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4755 utils.CommaJoin(errs))
4758 class _OsQuery(_QueryBase):
4759 FIELDS = query.OS_FIELDS
4761 def ExpandNames(self, lu):
4762 # Lock all nodes in shared mode
4763 # Temporary removal of locks, should be reverted later
4764 # TODO: reintroduce locks when they are lighter-weight
4765 lu.needed_locks = {}
4766 #self.share_locks[locking.LEVEL_NODE] = 1
4767 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4769 # The following variables interact with _QueryBase._GetNames
4771 self.wanted = self.names
4773 self.wanted = locking.ALL_SET
4775 self.do_locking = self.use_locking
4777 def DeclareLocks(self, lu, level):
4781 def _DiagnoseByOS(rlist):
4782 """Remaps a per-node return list into an a per-os per-node dictionary
4784 @param rlist: a map with node names as keys and OS objects as values
4787 @return: a dictionary with osnames as keys and as value another
4788 map, with nodes as keys and tuples of (path, status, diagnose,
4789 variants, parameters, api_versions) as values, eg::
4791 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4792 (/srv/..., False, "invalid api")],
4793 "node2": [(/srv/..., True, "", [], [])]}
4798 # we build here the list of nodes that didn't fail the RPC (at RPC
4799 # level), so that nodes with a non-responding node daemon don't
4800 # make all OSes invalid
4801 good_nodes = [node_name for node_name in rlist
4802 if not rlist[node_name].fail_msg]
4803 for node_name, nr in rlist.items():
4804 if nr.fail_msg or not nr.payload:
4806 for (name, path, status, diagnose, variants,
4807 params, api_versions) in nr.payload:
4808 if name not in all_os:
4809 # build a list of nodes for this os containing empty lists
4810 # for each node in node_list
4812 for nname in good_nodes:
4813 all_os[name][nname] = []
4814 # convert params from [name, help] to (name, help)
4815 params = [tuple(v) for v in params]
4816 all_os[name][node_name].append((path, status, diagnose,
4817 variants, params, api_versions))
4820 def _GetQueryData(self, lu):
4821 """Computes the list of nodes and their attributes.
4824 # Locking is not used
4825 assert not (compat.any(lu.glm.is_owned(level)
4826 for level in locking.LEVELS
4827 if level != locking.LEVEL_CLUSTER) or
4828 self.do_locking or self.use_locking)
4830 valid_nodes = [node.name
4831 for node in lu.cfg.GetAllNodesInfo().values()
4832 if not node.offline and node.vm_capable]
4833 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4834 cluster = lu.cfg.GetClusterInfo()
4838 for (os_name, os_data) in pol.items():
4839 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4840 hidden=(os_name in cluster.hidden_os),
4841 blacklisted=(os_name in cluster.blacklisted_os))
4845 api_versions = set()
4847 for idx, osl in enumerate(os_data.values()):
4848 info.valid = bool(info.valid and osl and osl[0][1])
4852 (node_variants, node_params, node_api) = osl[0][3:6]
4855 variants.update(node_variants)
4856 parameters.update(node_params)
4857 api_versions.update(node_api)
4859 # Filter out inconsistent values
4860 variants.intersection_update(node_variants)
4861 parameters.intersection_update(node_params)
4862 api_versions.intersection_update(node_api)
4864 info.variants = list(variants)
4865 info.parameters = list(parameters)
4866 info.api_versions = list(api_versions)
4868 data[os_name] = info
4870 # Prepare data in requested order
4871 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4875 class LUOsDiagnose(NoHooksLU):
4876 """Logical unit for OS diagnose/query.
4882 def _BuildFilter(fields, names):
4883 """Builds a filter for querying OSes.
4886 name_filter = qlang.MakeSimpleFilter("name", names)
4888 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4889 # respective field is not requested
4890 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4891 for fname in ["hidden", "blacklisted"]
4892 if fname not in fields]
4893 if "valid" not in fields:
4894 status_filter.append([qlang.OP_TRUE, "valid"])
4897 status_filter.insert(0, qlang.OP_AND)
4899 status_filter = None
4901 if name_filter and status_filter:
4902 return [qlang.OP_AND, name_filter, status_filter]
4906 return status_filter
4908 def CheckArguments(self):
4909 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4910 self.op.output_fields, False)
4912 def ExpandNames(self):
4913 self.oq.ExpandNames(self)
4915 def Exec(self, feedback_fn):
4916 return self.oq.OldStyleQuery(self)
4919 class LUNodeRemove(LogicalUnit):
4920 """Logical unit for removing a node.
4923 HPATH = "node-remove"
4924 HTYPE = constants.HTYPE_NODE
4926 def BuildHooksEnv(self):
4931 "OP_TARGET": self.op.node_name,
4932 "NODE_NAME": self.op.node_name,
4935 def BuildHooksNodes(self):
4936 """Build hooks nodes.
4938 This doesn't run on the target node in the pre phase as a failed
4939 node would then be impossible to remove.
4942 all_nodes = self.cfg.GetNodeList()
4944 all_nodes.remove(self.op.node_name)
4947 return (all_nodes, all_nodes)
4949 def CheckPrereq(self):
4950 """Check prerequisites.
4953 - the node exists in the configuration
4954 - it does not have primary or secondary instances
4955 - it's not the master
4957 Any errors are signaled by raising errors.OpPrereqError.
4960 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4961 node = self.cfg.GetNodeInfo(self.op.node_name)
4962 assert node is not None
4964 masternode = self.cfg.GetMasterNode()
4965 if node.name == masternode:
4966 raise errors.OpPrereqError("Node is the master node, failover to another"
4967 " node is required", errors.ECODE_INVAL)
4969 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4970 if node.name in instance.all_nodes:
4971 raise errors.OpPrereqError("Instance %s is still running on the node,"
4972 " please remove first" % instance_name,
4974 self.op.node_name = node.name
4977 def Exec(self, feedback_fn):
4978 """Removes the node from the cluster.
4982 logging.info("Stopping the node daemon and removing configs from node %s",
4985 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4987 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4990 # Promote nodes to master candidate as needed
4991 _AdjustCandidatePool(self, exceptions=[node.name])
4992 self.context.RemoveNode(node.name)
4994 # Run post hooks on the node before it's removed
4995 _RunPostHook(self, node.name)
4997 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4998 msg = result.fail_msg
5000 self.LogWarning("Errors encountered on the remote node while leaving"
5001 " the cluster: %s", msg)
5003 # Remove node from our /etc/hosts
5004 if self.cfg.GetClusterInfo().modify_etc_hosts:
5005 master_node = self.cfg.GetMasterNode()
5006 result = self.rpc.call_etc_hosts_modify(master_node,
5007 constants.ETC_HOSTS_REMOVE,
5009 result.Raise("Can't update hosts file with new host data")
5010 _RedistributeAncillaryFiles(self)
5013 class _NodeQuery(_QueryBase):
5014 FIELDS = query.NODE_FIELDS
5016 def ExpandNames(self, lu):
5017 lu.needed_locks = {}
5018 lu.share_locks = _ShareAll()
5021 self.wanted = _GetWantedNodes(lu, self.names)
5023 self.wanted = locking.ALL_SET
5025 self.do_locking = (self.use_locking and
5026 query.NQ_LIVE in self.requested_data)
5029 # If any non-static field is requested we need to lock the nodes
5030 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5032 def DeclareLocks(self, lu, level):
5035 def _GetQueryData(self, lu):
5036 """Computes the list of nodes and their attributes.
5039 all_info = lu.cfg.GetAllNodesInfo()
5041 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5043 # Gather data as requested
5044 if query.NQ_LIVE in self.requested_data:
5045 # filter out non-vm_capable nodes
5046 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5048 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5049 [lu.cfg.GetHypervisorType()])
5050 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5051 for (name, nresult) in node_data.items()
5052 if not nresult.fail_msg and nresult.payload)
5056 if query.NQ_INST in self.requested_data:
5057 node_to_primary = dict([(name, set()) for name in nodenames])
5058 node_to_secondary = dict([(name, set()) for name in nodenames])
5060 inst_data = lu.cfg.GetAllInstancesInfo()
5062 for inst in inst_data.values():
5063 if inst.primary_node in node_to_primary:
5064 node_to_primary[inst.primary_node].add(inst.name)
5065 for secnode in inst.secondary_nodes:
5066 if secnode in node_to_secondary:
5067 node_to_secondary[secnode].add(inst.name)
5069 node_to_primary = None
5070 node_to_secondary = None
5072 if query.NQ_OOB in self.requested_data:
5073 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5074 for name, node in all_info.iteritems())
5078 if query.NQ_GROUP in self.requested_data:
5079 groups = lu.cfg.GetAllNodeGroupsInfo()
5083 return query.NodeQueryData([all_info[name] for name in nodenames],
5084 live_data, lu.cfg.GetMasterNode(),
5085 node_to_primary, node_to_secondary, groups,
5086 oob_support, lu.cfg.GetClusterInfo())
5089 class LUNodeQuery(NoHooksLU):
5090 """Logical unit for querying nodes.
5093 # pylint: disable=W0142
5096 def CheckArguments(self):
5097 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5098 self.op.output_fields, self.op.use_locking)
5100 def ExpandNames(self):
5101 self.nq.ExpandNames(self)
5103 def DeclareLocks(self, level):
5104 self.nq.DeclareLocks(self, level)
5106 def Exec(self, feedback_fn):
5107 return self.nq.OldStyleQuery(self)
5110 class LUNodeQueryvols(NoHooksLU):
5111 """Logical unit for getting volumes on node(s).
5115 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5116 _FIELDS_STATIC = utils.FieldSet("node")
5118 def CheckArguments(self):
5119 _CheckOutputFields(static=self._FIELDS_STATIC,
5120 dynamic=self._FIELDS_DYNAMIC,
5121 selected=self.op.output_fields)
5123 def ExpandNames(self):
5124 self.share_locks = _ShareAll()
5125 self.needed_locks = {}
5127 if not self.op.nodes:
5128 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5130 self.needed_locks[locking.LEVEL_NODE] = \
5131 _GetWantedNodes(self, self.op.nodes)
5133 def Exec(self, feedback_fn):
5134 """Computes the list of nodes and their attributes.
5137 nodenames = self.owned_locks(locking.LEVEL_NODE)
5138 volumes = self.rpc.call_node_volumes(nodenames)
5140 ilist = self.cfg.GetAllInstancesInfo()
5141 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5144 for node in nodenames:
5145 nresult = volumes[node]
5148 msg = nresult.fail_msg
5150 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5153 node_vols = sorted(nresult.payload,
5154 key=operator.itemgetter("dev"))
5156 for vol in node_vols:
5158 for field in self.op.output_fields:
5161 elif field == "phys":
5165 elif field == "name":
5167 elif field == "size":
5168 val = int(float(vol["size"]))
5169 elif field == "instance":
5170 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5172 raise errors.ParameterError(field)
5173 node_output.append(str(val))
5175 output.append(node_output)
5180 class LUNodeQueryStorage(NoHooksLU):
5181 """Logical unit for getting information on storage units on node(s).
5184 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5187 def CheckArguments(self):
5188 _CheckOutputFields(static=self._FIELDS_STATIC,
5189 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5190 selected=self.op.output_fields)
5192 def ExpandNames(self):
5193 self.share_locks = _ShareAll()
5194 self.needed_locks = {}
5197 self.needed_locks[locking.LEVEL_NODE] = \
5198 _GetWantedNodes(self, self.op.nodes)
5200 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5202 def Exec(self, feedback_fn):
5203 """Computes the list of nodes and their attributes.
5206 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5208 # Always get name to sort by
5209 if constants.SF_NAME in self.op.output_fields:
5210 fields = self.op.output_fields[:]
5212 fields = [constants.SF_NAME] + self.op.output_fields
5214 # Never ask for node or type as it's only known to the LU
5215 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5216 while extra in fields:
5217 fields.remove(extra)
5219 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5220 name_idx = field_idx[constants.SF_NAME]
5222 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5223 data = self.rpc.call_storage_list(self.nodes,
5224 self.op.storage_type, st_args,
5225 self.op.name, fields)
5229 for node in utils.NiceSort(self.nodes):
5230 nresult = data[node]
5234 msg = nresult.fail_msg
5236 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5239 rows = dict([(row[name_idx], row) for row in nresult.payload])
5241 for name in utils.NiceSort(rows.keys()):
5246 for field in self.op.output_fields:
5247 if field == constants.SF_NODE:
5249 elif field == constants.SF_TYPE:
5250 val = self.op.storage_type
5251 elif field in field_idx:
5252 val = row[field_idx[field]]
5254 raise errors.ParameterError(field)
5263 class _InstanceQuery(_QueryBase):
5264 FIELDS = query.INSTANCE_FIELDS
5266 def ExpandNames(self, lu):
5267 lu.needed_locks = {}
5268 lu.share_locks = _ShareAll()
5271 self.wanted = _GetWantedInstances(lu, self.names)
5273 self.wanted = locking.ALL_SET
5275 self.do_locking = (self.use_locking and
5276 query.IQ_LIVE in self.requested_data)
5278 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5279 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5280 lu.needed_locks[locking.LEVEL_NODE] = []
5281 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5283 self.do_grouplocks = (self.do_locking and
5284 query.IQ_NODES in self.requested_data)
5286 def DeclareLocks(self, lu, level):
5288 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5289 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5291 # Lock all groups used by instances optimistically; this requires going
5292 # via the node before it's locked, requiring verification later on
5293 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5295 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5296 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5297 elif level == locking.LEVEL_NODE:
5298 lu._LockInstancesNodes() # pylint: disable=W0212
5301 def _CheckGroupLocks(lu):
5302 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5303 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5305 # Check if node groups for locked instances are still correct
5306 for instance_name in owned_instances:
5307 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5309 def _GetQueryData(self, lu):
5310 """Computes the list of instances and their attributes.
5313 if self.do_grouplocks:
5314 self._CheckGroupLocks(lu)
5316 cluster = lu.cfg.GetClusterInfo()
5317 all_info = lu.cfg.GetAllInstancesInfo()
5319 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5321 instance_list = [all_info[name] for name in instance_names]
5322 nodes = frozenset(itertools.chain(*(inst.all_nodes
5323 for inst in instance_list)))
5324 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5327 wrongnode_inst = set()
5329 # Gather data as requested
5330 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5332 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5334 result = node_data[name]
5336 # offline nodes will be in both lists
5337 assert result.fail_msg
5338 offline_nodes.append(name)
5340 bad_nodes.append(name)
5341 elif result.payload:
5342 for inst in result.payload:
5343 if inst in all_info:
5344 if all_info[inst].primary_node == name:
5345 live_data.update(result.payload)
5347 wrongnode_inst.add(inst)
5349 # orphan instance; we don't list it here as we don't
5350 # handle this case yet in the output of instance listing
5351 logging.warning("Orphan instance '%s' found on node %s",
5353 # else no instance is alive
5357 if query.IQ_DISKUSAGE in self.requested_data:
5358 gmi = ganeti.masterd.instance
5359 disk_usage = dict((inst.name,
5360 gmi.ComputeDiskSize(inst.disk_template,
5361 [{constants.IDISK_SIZE: disk.size}
5362 for disk in inst.disks]))
5363 for inst in instance_list)
5367 if query.IQ_CONSOLE in self.requested_data:
5369 for inst in instance_list:
5370 if inst.name in live_data:
5371 # Instance is running
5372 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5374 consinfo[inst.name] = None
5375 assert set(consinfo.keys()) == set(instance_names)
5379 if query.IQ_NODES in self.requested_data:
5380 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5382 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5383 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5384 for uuid in set(map(operator.attrgetter("group"),
5390 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5391 disk_usage, offline_nodes, bad_nodes,
5392 live_data, wrongnode_inst, consinfo,
5396 class LUQuery(NoHooksLU):
5397 """Query for resources/items of a certain kind.
5400 # pylint: disable=W0142
5403 def CheckArguments(self):
5404 qcls = _GetQueryImplementation(self.op.what)
5406 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5408 def ExpandNames(self):
5409 self.impl.ExpandNames(self)
5411 def DeclareLocks(self, level):
5412 self.impl.DeclareLocks(self, level)
5414 def Exec(self, feedback_fn):
5415 return self.impl.NewStyleQuery(self)
5418 class LUQueryFields(NoHooksLU):
5419 """Query for resources/items of a certain kind.
5422 # pylint: disable=W0142
5425 def CheckArguments(self):
5426 self.qcls = _GetQueryImplementation(self.op.what)
5428 def ExpandNames(self):
5429 self.needed_locks = {}
5431 def Exec(self, feedback_fn):
5432 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5435 class LUNodeModifyStorage(NoHooksLU):
5436 """Logical unit for modifying a storage volume on a node.
5441 def CheckArguments(self):
5442 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5444 storage_type = self.op.storage_type
5447 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5449 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5450 " modified" % storage_type,
5453 diff = set(self.op.changes.keys()) - modifiable
5455 raise errors.OpPrereqError("The following fields can not be modified for"
5456 " storage units of type '%s': %r" %
5457 (storage_type, list(diff)),
5460 def ExpandNames(self):
5461 self.needed_locks = {
5462 locking.LEVEL_NODE: self.op.node_name,
5465 def Exec(self, feedback_fn):
5466 """Computes the list of nodes and their attributes.
5469 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5470 result = self.rpc.call_storage_modify(self.op.node_name,
5471 self.op.storage_type, st_args,
5472 self.op.name, self.op.changes)
5473 result.Raise("Failed to modify storage unit '%s' on %s" %
5474 (self.op.name, self.op.node_name))
5477 class LUNodeAdd(LogicalUnit):
5478 """Logical unit for adding node to the cluster.
5482 HTYPE = constants.HTYPE_NODE
5483 _NFLAGS = ["master_capable", "vm_capable"]
5485 def CheckArguments(self):
5486 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5487 # validate/normalize the node name
5488 self.hostname = netutils.GetHostname(name=self.op.node_name,
5489 family=self.primary_ip_family)
5490 self.op.node_name = self.hostname.name
5492 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5493 raise errors.OpPrereqError("Cannot readd the master node",
5496 if self.op.readd and self.op.group:
5497 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5498 " being readded", errors.ECODE_INVAL)
5500 def BuildHooksEnv(self):
5503 This will run on all nodes before, and on all nodes + the new node after.
5507 "OP_TARGET": self.op.node_name,
5508 "NODE_NAME": self.op.node_name,
5509 "NODE_PIP": self.op.primary_ip,
5510 "NODE_SIP": self.op.secondary_ip,
5511 "MASTER_CAPABLE": str(self.op.master_capable),
5512 "VM_CAPABLE": str(self.op.vm_capable),
5515 def BuildHooksNodes(self):
5516 """Build hooks nodes.
5519 # Exclude added node
5520 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5521 post_nodes = pre_nodes + [self.op.node_name, ]
5523 return (pre_nodes, post_nodes)
5525 def CheckPrereq(self):
5526 """Check prerequisites.
5529 - the new node is not already in the config
5531 - its parameters (single/dual homed) matches the cluster
5533 Any errors are signaled by raising errors.OpPrereqError.
5537 hostname = self.hostname
5538 node = hostname.name
5539 primary_ip = self.op.primary_ip = hostname.ip
5540 if self.op.secondary_ip is None:
5541 if self.primary_ip_family == netutils.IP6Address.family:
5542 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5543 " IPv4 address must be given as secondary",
5545 self.op.secondary_ip = primary_ip
5547 secondary_ip = self.op.secondary_ip
5548 if not netutils.IP4Address.IsValid(secondary_ip):
5549 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5550 " address" % secondary_ip, errors.ECODE_INVAL)
5552 node_list = cfg.GetNodeList()
5553 if not self.op.readd and node in node_list:
5554 raise errors.OpPrereqError("Node %s is already in the configuration" %
5555 node, errors.ECODE_EXISTS)
5556 elif self.op.readd and node not in node_list:
5557 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5560 self.changed_primary_ip = False
5562 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5563 if self.op.readd and node == existing_node_name:
5564 if existing_node.secondary_ip != secondary_ip:
5565 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5566 " address configuration as before",
5568 if existing_node.primary_ip != primary_ip:
5569 self.changed_primary_ip = True
5573 if (existing_node.primary_ip == primary_ip or
5574 existing_node.secondary_ip == primary_ip or
5575 existing_node.primary_ip == secondary_ip or
5576 existing_node.secondary_ip == secondary_ip):
5577 raise errors.OpPrereqError("New node ip address(es) conflict with"
5578 " existing node %s" % existing_node.name,
5579 errors.ECODE_NOTUNIQUE)
5581 # After this 'if' block, None is no longer a valid value for the
5582 # _capable op attributes
5584 old_node = self.cfg.GetNodeInfo(node)
5585 assert old_node is not None, "Can't retrieve locked node %s" % node
5586 for attr in self._NFLAGS:
5587 if getattr(self.op, attr) is None:
5588 setattr(self.op, attr, getattr(old_node, attr))
5590 for attr in self._NFLAGS:
5591 if getattr(self.op, attr) is None:
5592 setattr(self.op, attr, True)
5594 if self.op.readd and not self.op.vm_capable:
5595 pri, sec = cfg.GetNodeInstances(node)
5597 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5598 " flag set to false, but it already holds"
5599 " instances" % node,
5602 # check that the type of the node (single versus dual homed) is the
5603 # same as for the master
5604 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5605 master_singlehomed = myself.secondary_ip == myself.primary_ip
5606 newbie_singlehomed = secondary_ip == primary_ip
5607 if master_singlehomed != newbie_singlehomed:
5608 if master_singlehomed:
5609 raise errors.OpPrereqError("The master has no secondary ip but the"
5610 " new node has one",
5613 raise errors.OpPrereqError("The master has a secondary ip but the"
5614 " new node doesn't have one",
5617 # checks reachability
5618 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5619 raise errors.OpPrereqError("Node not reachable by ping",
5620 errors.ECODE_ENVIRON)
5622 if not newbie_singlehomed:
5623 # check reachability from my secondary ip to newbie's secondary ip
5624 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5625 source=myself.secondary_ip):
5626 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5627 " based ping to node daemon port",
5628 errors.ECODE_ENVIRON)
5635 if self.op.master_capable:
5636 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5638 self.master_candidate = False
5641 self.new_node = old_node
5643 node_group = cfg.LookupNodeGroup(self.op.group)
5644 self.new_node = objects.Node(name=node,
5645 primary_ip=primary_ip,
5646 secondary_ip=secondary_ip,
5647 master_candidate=self.master_candidate,
5648 offline=False, drained=False,
5651 if self.op.ndparams:
5652 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5654 if self.op.hv_state:
5655 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5657 if self.op.disk_state:
5658 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5660 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5661 # it a property on the base class.
5662 result = rpc.DnsOnlyRunner().call_version([node])[node]
5663 result.Raise("Can't get version information from node %s" % node)
5664 if constants.PROTOCOL_VERSION == result.payload:
5665 logging.info("Communication to node %s fine, sw version %s match",
5666 node, result.payload)
5668 raise errors.OpPrereqError("Version mismatch master version %s,"
5669 " node version %s" %
5670 (constants.PROTOCOL_VERSION, result.payload),
5671 errors.ECODE_ENVIRON)
5673 def Exec(self, feedback_fn):
5674 """Adds the new node to the cluster.
5677 new_node = self.new_node
5678 node = new_node.name
5680 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5683 # We adding a new node so we assume it's powered
5684 new_node.powered = True
5686 # for re-adds, reset the offline/drained/master-candidate flags;
5687 # we need to reset here, otherwise offline would prevent RPC calls
5688 # later in the procedure; this also means that if the re-add
5689 # fails, we are left with a non-offlined, broken node
5691 new_node.drained = new_node.offline = False # pylint: disable=W0201
5692 self.LogInfo("Readding a node, the offline/drained flags were reset")
5693 # if we demote the node, we do cleanup later in the procedure
5694 new_node.master_candidate = self.master_candidate
5695 if self.changed_primary_ip:
5696 new_node.primary_ip = self.op.primary_ip
5698 # copy the master/vm_capable flags
5699 for attr in self._NFLAGS:
5700 setattr(new_node, attr, getattr(self.op, attr))
5702 # notify the user about any possible mc promotion
5703 if new_node.master_candidate:
5704 self.LogInfo("Node will be a master candidate")
5706 if self.op.ndparams:
5707 new_node.ndparams = self.op.ndparams
5709 new_node.ndparams = {}
5711 if self.op.hv_state:
5712 new_node.hv_state_static = self.new_hv_state
5714 if self.op.disk_state:
5715 new_node.disk_state_static = self.new_disk_state
5717 # Add node to our /etc/hosts, and add key to known_hosts
5718 if self.cfg.GetClusterInfo().modify_etc_hosts:
5719 master_node = self.cfg.GetMasterNode()
5720 result = self.rpc.call_etc_hosts_modify(master_node,
5721 constants.ETC_HOSTS_ADD,
5724 result.Raise("Can't update hosts file with new host data")
5726 if new_node.secondary_ip != new_node.primary_ip:
5727 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5730 node_verify_list = [self.cfg.GetMasterNode()]
5731 node_verify_param = {
5732 constants.NV_NODELIST: ([node], {}),
5733 # TODO: do a node-net-test as well?
5736 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5737 self.cfg.GetClusterName())
5738 for verifier in node_verify_list:
5739 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5740 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5742 for failed in nl_payload:
5743 feedback_fn("ssh/hostname verification failed"
5744 " (checking from %s): %s" %
5745 (verifier, nl_payload[failed]))
5746 raise errors.OpExecError("ssh/hostname verification failed")
5749 _RedistributeAncillaryFiles(self)
5750 self.context.ReaddNode(new_node)
5751 # make sure we redistribute the config
5752 self.cfg.Update(new_node, feedback_fn)
5753 # and make sure the new node will not have old files around
5754 if not new_node.master_candidate:
5755 result = self.rpc.call_node_demote_from_mc(new_node.name)
5756 msg = result.fail_msg
5758 self.LogWarning("Node failed to demote itself from master"
5759 " candidate status: %s" % msg)
5761 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5762 additional_vm=self.op.vm_capable)
5763 self.context.AddNode(new_node, self.proc.GetECId())
5766 class LUNodeSetParams(LogicalUnit):
5767 """Modifies the parameters of a node.
5769 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5770 to the node role (as _ROLE_*)
5771 @cvar _R2F: a dictionary from node role to tuples of flags
5772 @cvar _FLAGS: a list of attribute names corresponding to the flags
5775 HPATH = "node-modify"
5776 HTYPE = constants.HTYPE_NODE
5778 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5780 (True, False, False): _ROLE_CANDIDATE,
5781 (False, True, False): _ROLE_DRAINED,
5782 (False, False, True): _ROLE_OFFLINE,
5783 (False, False, False): _ROLE_REGULAR,
5785 _R2F = dict((v, k) for k, v in _F2R.items())
5786 _FLAGS = ["master_candidate", "drained", "offline"]
5788 def CheckArguments(self):
5789 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5790 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5791 self.op.master_capable, self.op.vm_capable,
5792 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5794 if all_mods.count(None) == len(all_mods):
5795 raise errors.OpPrereqError("Please pass at least one modification",
5797 if all_mods.count(True) > 1:
5798 raise errors.OpPrereqError("Can't set the node into more than one"
5799 " state at the same time",
5802 # Boolean value that tells us whether we might be demoting from MC
5803 self.might_demote = (self.op.master_candidate is False or
5804 self.op.offline is True or
5805 self.op.drained is True or
5806 self.op.master_capable is False)
5808 if self.op.secondary_ip:
5809 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5810 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5811 " address" % self.op.secondary_ip,
5814 self.lock_all = self.op.auto_promote and self.might_demote
5815 self.lock_instances = self.op.secondary_ip is not None
5817 def _InstanceFilter(self, instance):
5818 """Filter for getting affected instances.
5821 return (instance.disk_template in constants.DTS_INT_MIRROR and
5822 self.op.node_name in instance.all_nodes)
5824 def ExpandNames(self):
5826 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5828 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5830 # Since modifying a node can have severe effects on currently running
5831 # operations the resource lock is at least acquired in shared mode
5832 self.needed_locks[locking.LEVEL_NODE_RES] = \
5833 self.needed_locks[locking.LEVEL_NODE]
5835 # Get node resource and instance locks in shared mode; they are not used
5836 # for anything but read-only access
5837 self.share_locks[locking.LEVEL_NODE_RES] = 1
5838 self.share_locks[locking.LEVEL_INSTANCE] = 1
5840 if self.lock_instances:
5841 self.needed_locks[locking.LEVEL_INSTANCE] = \
5842 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5844 def BuildHooksEnv(self):
5847 This runs on the master node.
5851 "OP_TARGET": self.op.node_name,
5852 "MASTER_CANDIDATE": str(self.op.master_candidate),
5853 "OFFLINE": str(self.op.offline),
5854 "DRAINED": str(self.op.drained),
5855 "MASTER_CAPABLE": str(self.op.master_capable),
5856 "VM_CAPABLE": str(self.op.vm_capable),
5859 def BuildHooksNodes(self):
5860 """Build hooks nodes.
5863 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5866 def CheckPrereq(self):
5867 """Check prerequisites.
5869 This only checks the instance list against the existing names.
5872 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5874 if self.lock_instances:
5875 affected_instances = \
5876 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5878 # Verify instance locks
5879 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5880 wanted_instances = frozenset(affected_instances.keys())
5881 if wanted_instances - owned_instances:
5882 raise errors.OpPrereqError("Instances affected by changing node %s's"
5883 " secondary IP address have changed since"
5884 " locks were acquired, wanted '%s', have"
5885 " '%s'; retry the operation" %
5887 utils.CommaJoin(wanted_instances),
5888 utils.CommaJoin(owned_instances)),
5891 affected_instances = None
5893 if (self.op.master_candidate is not None or
5894 self.op.drained is not None or
5895 self.op.offline is not None):
5896 # we can't change the master's node flags
5897 if self.op.node_name == self.cfg.GetMasterNode():
5898 raise errors.OpPrereqError("The master role can be changed"
5899 " only via master-failover",
5902 if self.op.master_candidate and not node.master_capable:
5903 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5904 " it a master candidate" % node.name,
5907 if self.op.vm_capable is False:
5908 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5910 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5911 " the vm_capable flag" % node.name,
5914 if node.master_candidate and self.might_demote and not self.lock_all:
5915 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5916 # check if after removing the current node, we're missing master
5918 (mc_remaining, mc_should, _) = \
5919 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5920 if mc_remaining < mc_should:
5921 raise errors.OpPrereqError("Not enough master candidates, please"
5922 " pass auto promote option to allow"
5923 " promotion (--auto-promote or RAPI"
5924 " auto_promote=True)", errors.ECODE_STATE)
5926 self.old_flags = old_flags = (node.master_candidate,
5927 node.drained, node.offline)
5928 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5929 self.old_role = old_role = self._F2R[old_flags]
5931 # Check for ineffective changes
5932 for attr in self._FLAGS:
5933 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5934 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5935 setattr(self.op, attr, None)
5937 # Past this point, any flag change to False means a transition
5938 # away from the respective state, as only real changes are kept
5940 # TODO: We might query the real power state if it supports OOB
5941 if _SupportsOob(self.cfg, node):
5942 if self.op.offline is False and not (node.powered or
5943 self.op.powered is True):
5944 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5945 " offline status can be reset") %
5946 self.op.node_name, errors.ECODE_STATE)
5947 elif self.op.powered is not None:
5948 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5949 " as it does not support out-of-band"
5950 " handling") % self.op.node_name,
5953 # If we're being deofflined/drained, we'll MC ourself if needed
5954 if (self.op.drained is False or self.op.offline is False or
5955 (self.op.master_capable and not node.master_capable)):
5956 if _DecideSelfPromotion(self):
5957 self.op.master_candidate = True
5958 self.LogInfo("Auto-promoting node to master candidate")
5960 # If we're no longer master capable, we'll demote ourselves from MC
5961 if self.op.master_capable is False and node.master_candidate:
5962 self.LogInfo("Demoting from master candidate")
5963 self.op.master_candidate = False
5966 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5967 if self.op.master_candidate:
5968 new_role = self._ROLE_CANDIDATE
5969 elif self.op.drained:
5970 new_role = self._ROLE_DRAINED
5971 elif self.op.offline:
5972 new_role = self._ROLE_OFFLINE
5973 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5974 # False is still in new flags, which means we're un-setting (the
5976 new_role = self._ROLE_REGULAR
5977 else: # no new flags, nothing, keep old role
5980 self.new_role = new_role
5982 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5983 # Trying to transition out of offline status
5984 result = self.rpc.call_version([node.name])[node.name]
5986 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5987 " to report its version: %s" %
5988 (node.name, result.fail_msg),
5991 self.LogWarning("Transitioning node from offline to online state"
5992 " without using re-add. Please make sure the node"
5995 # When changing the secondary ip, verify if this is a single-homed to
5996 # multi-homed transition or vice versa, and apply the relevant
5998 if self.op.secondary_ip:
5999 # Ok even without locking, because this can't be changed by any LU
6000 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6001 master_singlehomed = master.secondary_ip == master.primary_ip
6002 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6003 if self.op.force and node.name == master.name:
6004 self.LogWarning("Transitioning from single-homed to multi-homed"
6005 " cluster. All nodes will require a secondary ip.")
6007 raise errors.OpPrereqError("Changing the secondary ip on a"
6008 " single-homed cluster requires the"
6009 " --force option to be passed, and the"
6010 " target node to be the master",
6012 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6013 if self.op.force and node.name == master.name:
6014 self.LogWarning("Transitioning from multi-homed to single-homed"
6015 " cluster. Secondary IPs will have to be removed.")
6017 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6018 " same as the primary IP on a multi-homed"
6019 " cluster, unless the --force option is"
6020 " passed, and the target node is the"
6021 " master", errors.ECODE_INVAL)
6023 assert not (frozenset(affected_instances) -
6024 self.owned_locks(locking.LEVEL_INSTANCE))
6027 if affected_instances:
6028 msg = ("Cannot change secondary IP address: offline node has"
6029 " instances (%s) configured to use it" %
6030 utils.CommaJoin(affected_instances.keys()))
6031 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6033 # On online nodes, check that no instances are running, and that
6034 # the node has the new ip and we can reach it.
6035 for instance in affected_instances.values():
6036 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6037 msg="cannot change secondary ip")
6039 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6040 if master.name != node.name:
6041 # check reachability from master secondary ip to new secondary ip
6042 if not netutils.TcpPing(self.op.secondary_ip,
6043 constants.DEFAULT_NODED_PORT,
6044 source=master.secondary_ip):
6045 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6046 " based ping to node daemon port",
6047 errors.ECODE_ENVIRON)
6049 if self.op.ndparams:
6050 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6051 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6052 self.new_ndparams = new_ndparams
6054 if self.op.hv_state:
6055 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6056 self.node.hv_state_static)
6058 if self.op.disk_state:
6059 self.new_disk_state = \
6060 _MergeAndVerifyDiskState(self.op.disk_state,
6061 self.node.disk_state_static)
6063 def Exec(self, feedback_fn):
6068 old_role = self.old_role
6069 new_role = self.new_role
6073 if self.op.ndparams:
6074 node.ndparams = self.new_ndparams
6076 if self.op.powered is not None:
6077 node.powered = self.op.powered
6079 if self.op.hv_state:
6080 node.hv_state_static = self.new_hv_state
6082 if self.op.disk_state:
6083 node.disk_state_static = self.new_disk_state
6085 for attr in ["master_capable", "vm_capable"]:
6086 val = getattr(self.op, attr)
6088 setattr(node, attr, val)
6089 result.append((attr, str(val)))
6091 if new_role != old_role:
6092 # Tell the node to demote itself, if no longer MC and not offline
6093 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6094 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6096 self.LogWarning("Node failed to demote itself: %s", msg)
6098 new_flags = self._R2F[new_role]
6099 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6101 result.append((desc, str(nf)))
6102 (node.master_candidate, node.drained, node.offline) = new_flags
6104 # we locked all nodes, we adjust the CP before updating this node
6106 _AdjustCandidatePool(self, [node.name])
6108 if self.op.secondary_ip:
6109 node.secondary_ip = self.op.secondary_ip
6110 result.append(("secondary_ip", self.op.secondary_ip))
6112 # this will trigger configuration file update, if needed
6113 self.cfg.Update(node, feedback_fn)
6115 # this will trigger job queue propagation or cleanup if the mc
6117 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6118 self.context.ReaddNode(node)
6123 class LUNodePowercycle(NoHooksLU):
6124 """Powercycles a node.
6129 def CheckArguments(self):
6130 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6131 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6132 raise errors.OpPrereqError("The node is the master and the force"
6133 " parameter was not set",
6136 def ExpandNames(self):
6137 """Locking for PowercycleNode.
6139 This is a last-resort option and shouldn't block on other
6140 jobs. Therefore, we grab no locks.
6143 self.needed_locks = {}
6145 def Exec(self, feedback_fn):
6149 result = self.rpc.call_node_powercycle(self.op.node_name,
6150 self.cfg.GetHypervisorType())
6151 result.Raise("Failed to schedule the reboot")
6152 return result.payload
6155 class LUClusterQuery(NoHooksLU):
6156 """Query cluster configuration.
6161 def ExpandNames(self):
6162 self.needed_locks = {}
6164 def Exec(self, feedback_fn):
6165 """Return cluster config.
6168 cluster = self.cfg.GetClusterInfo()
6171 # Filter just for enabled hypervisors
6172 for os_name, hv_dict in cluster.os_hvp.items():
6173 os_hvp[os_name] = {}
6174 for hv_name, hv_params in hv_dict.items():
6175 if hv_name in cluster.enabled_hypervisors:
6176 os_hvp[os_name][hv_name] = hv_params
6178 # Convert ip_family to ip_version
6179 primary_ip_version = constants.IP4_VERSION
6180 if cluster.primary_ip_family == netutils.IP6Address.family:
6181 primary_ip_version = constants.IP6_VERSION
6184 "software_version": constants.RELEASE_VERSION,
6185 "protocol_version": constants.PROTOCOL_VERSION,
6186 "config_version": constants.CONFIG_VERSION,
6187 "os_api_version": max(constants.OS_API_VERSIONS),
6188 "export_version": constants.EXPORT_VERSION,
6189 "architecture": runtime.GetArchInfo(),
6190 "name": cluster.cluster_name,
6191 "master": cluster.master_node,
6192 "default_hypervisor": cluster.primary_hypervisor,
6193 "enabled_hypervisors": cluster.enabled_hypervisors,
6194 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6195 for hypervisor_name in cluster.enabled_hypervisors]),
6197 "beparams": cluster.beparams,
6198 "osparams": cluster.osparams,
6199 "ipolicy": cluster.ipolicy,
6200 "nicparams": cluster.nicparams,
6201 "ndparams": cluster.ndparams,
6202 "diskparams": cluster.diskparams,
6203 "candidate_pool_size": cluster.candidate_pool_size,
6204 "master_netdev": cluster.master_netdev,
6205 "master_netmask": cluster.master_netmask,
6206 "use_external_mip_script": cluster.use_external_mip_script,
6207 "volume_group_name": cluster.volume_group_name,
6208 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6209 "file_storage_dir": cluster.file_storage_dir,
6210 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6211 "maintain_node_health": cluster.maintain_node_health,
6212 "ctime": cluster.ctime,
6213 "mtime": cluster.mtime,
6214 "uuid": cluster.uuid,
6215 "tags": list(cluster.GetTags()),
6216 "uid_pool": cluster.uid_pool,
6217 "default_iallocator": cluster.default_iallocator,
6218 "reserved_lvs": cluster.reserved_lvs,
6219 "primary_ip_version": primary_ip_version,
6220 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6221 "hidden_os": cluster.hidden_os,
6222 "blacklisted_os": cluster.blacklisted_os,
6228 class LUClusterConfigQuery(NoHooksLU):
6229 """Return configuration values.
6234 def CheckArguments(self):
6235 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6237 def ExpandNames(self):
6238 self.cq.ExpandNames(self)
6240 def DeclareLocks(self, level):
6241 self.cq.DeclareLocks(self, level)
6243 def Exec(self, feedback_fn):
6244 result = self.cq.OldStyleQuery(self)
6246 assert len(result) == 1
6251 class _ClusterQuery(_QueryBase):
6252 FIELDS = query.CLUSTER_FIELDS
6254 #: Do not sort (there is only one item)
6257 def ExpandNames(self, lu):
6258 lu.needed_locks = {}
6260 # The following variables interact with _QueryBase._GetNames
6261 self.wanted = locking.ALL_SET
6262 self.do_locking = self.use_locking
6265 raise errors.OpPrereqError("Can not use locking for cluster queries",
6268 def DeclareLocks(self, lu, level):
6271 def _GetQueryData(self, lu):
6272 """Computes the list of nodes and their attributes.
6275 # Locking is not used
6276 assert not (compat.any(lu.glm.is_owned(level)
6277 for level in locking.LEVELS
6278 if level != locking.LEVEL_CLUSTER) or
6279 self.do_locking or self.use_locking)
6281 if query.CQ_CONFIG in self.requested_data:
6282 cluster = lu.cfg.GetClusterInfo()
6284 cluster = NotImplemented
6286 if query.CQ_QUEUE_DRAINED in self.requested_data:
6287 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6289 drain_flag = NotImplemented
6291 if query.CQ_WATCHER_PAUSE in self.requested_data:
6292 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6294 watcher_pause = NotImplemented
6296 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6299 class LUInstanceActivateDisks(NoHooksLU):
6300 """Bring up an instance's disks.
6305 def ExpandNames(self):
6306 self._ExpandAndLockInstance()
6307 self.needed_locks[locking.LEVEL_NODE] = []
6308 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6310 def DeclareLocks(self, level):
6311 if level == locking.LEVEL_NODE:
6312 self._LockInstancesNodes()
6314 def CheckPrereq(self):
6315 """Check prerequisites.
6317 This checks that the instance is in the cluster.
6320 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6321 assert self.instance is not None, \
6322 "Cannot retrieve locked instance %s" % self.op.instance_name
6323 _CheckNodeOnline(self, self.instance.primary_node)
6325 def Exec(self, feedback_fn):
6326 """Activate the disks.
6329 disks_ok, disks_info = \
6330 _AssembleInstanceDisks(self, self.instance,
6331 ignore_size=self.op.ignore_size)
6333 raise errors.OpExecError("Cannot activate block devices")
6335 if self.op.wait_for_sync:
6336 if not _WaitForSync(self, self.instance):
6337 raise errors.OpExecError("Some disks of the instance are degraded!")
6342 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6344 """Prepare the block devices for an instance.
6346 This sets up the block devices on all nodes.
6348 @type lu: L{LogicalUnit}
6349 @param lu: the logical unit on whose behalf we execute
6350 @type instance: L{objects.Instance}
6351 @param instance: the instance for whose disks we assemble
6352 @type disks: list of L{objects.Disk} or None
6353 @param disks: which disks to assemble (or all, if None)
6354 @type ignore_secondaries: boolean
6355 @param ignore_secondaries: if true, errors on secondary nodes
6356 won't result in an error return from the function
6357 @type ignore_size: boolean
6358 @param ignore_size: if true, the current known size of the disk
6359 will not be used during the disk activation, useful for cases
6360 when the size is wrong
6361 @return: False if the operation failed, otherwise a list of
6362 (host, instance_visible_name, node_visible_name)
6363 with the mapping from node devices to instance devices
6368 iname = instance.name
6369 disks = _ExpandCheckDisks(instance, disks)
6371 # With the two passes mechanism we try to reduce the window of
6372 # opportunity for the race condition of switching DRBD to primary
6373 # before handshaking occured, but we do not eliminate it
6375 # The proper fix would be to wait (with some limits) until the
6376 # connection has been made and drbd transitions from WFConnection
6377 # into any other network-connected state (Connected, SyncTarget,
6380 # 1st pass, assemble on all nodes in secondary mode
6381 for idx, inst_disk in enumerate(disks):
6382 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6384 node_disk = node_disk.Copy()
6385 node_disk.UnsetSize()
6386 lu.cfg.SetDiskID(node_disk, node)
6387 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6389 msg = result.fail_msg
6391 is_offline_secondary = (node in instance.secondary_nodes and
6393 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6394 " (is_primary=False, pass=1): %s",
6395 inst_disk.iv_name, node, msg)
6396 if not (ignore_secondaries or is_offline_secondary):
6399 # FIXME: race condition on drbd migration to primary
6401 # 2nd pass, do only the primary node
6402 for idx, inst_disk in enumerate(disks):
6405 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6406 if node != instance.primary_node:
6409 node_disk = node_disk.Copy()
6410 node_disk.UnsetSize()
6411 lu.cfg.SetDiskID(node_disk, node)
6412 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6414 msg = result.fail_msg
6416 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6417 " (is_primary=True, pass=2): %s",
6418 inst_disk.iv_name, node, msg)
6421 dev_path = result.payload
6423 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6425 # leave the disks configured for the primary node
6426 # this is a workaround that would be fixed better by
6427 # improving the logical/physical id handling
6429 lu.cfg.SetDiskID(disk, instance.primary_node)
6431 return disks_ok, device_info
6434 def _StartInstanceDisks(lu, instance, force):
6435 """Start the disks of an instance.
6438 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6439 ignore_secondaries=force)
6441 _ShutdownInstanceDisks(lu, instance)
6442 if force is not None and not force:
6443 lu.proc.LogWarning("", hint="If the message above refers to a"
6445 " you can retry the operation using '--force'.")
6446 raise errors.OpExecError("Disk consistency error")
6449 class LUInstanceDeactivateDisks(NoHooksLU):
6450 """Shutdown an instance's disks.
6455 def ExpandNames(self):
6456 self._ExpandAndLockInstance()
6457 self.needed_locks[locking.LEVEL_NODE] = []
6458 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6460 def DeclareLocks(self, level):
6461 if level == locking.LEVEL_NODE:
6462 self._LockInstancesNodes()
6464 def CheckPrereq(self):
6465 """Check prerequisites.
6467 This checks that the instance is in the cluster.
6470 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471 assert self.instance is not None, \
6472 "Cannot retrieve locked instance %s" % self.op.instance_name
6474 def Exec(self, feedback_fn):
6475 """Deactivate the disks
6478 instance = self.instance
6480 _ShutdownInstanceDisks(self, instance)
6482 _SafeShutdownInstanceDisks(self, instance)
6485 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6486 """Shutdown block devices of an instance.
6488 This function checks if an instance is running, before calling
6489 _ShutdownInstanceDisks.
6492 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6493 _ShutdownInstanceDisks(lu, instance, disks=disks)
6496 def _ExpandCheckDisks(instance, disks):
6497 """Return the instance disks selected by the disks list
6499 @type disks: list of L{objects.Disk} or None
6500 @param disks: selected disks
6501 @rtype: list of L{objects.Disk}
6502 @return: selected instance disks to act on
6506 return instance.disks
6508 if not set(disks).issubset(instance.disks):
6509 raise errors.ProgrammerError("Can only act on disks belonging to the"
6514 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6515 """Shutdown block devices of an instance.
6517 This does the shutdown on all nodes of the instance.
6519 If the ignore_primary is false, errors on the primary node are
6524 disks = _ExpandCheckDisks(instance, disks)
6527 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6528 lu.cfg.SetDiskID(top_disk, node)
6529 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6530 msg = result.fail_msg
6532 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6533 disk.iv_name, node, msg)
6534 if ((node == instance.primary_node and not ignore_primary) or
6535 (node != instance.primary_node and not result.offline)):
6540 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6541 """Checks if a node has enough free memory.
6543 This function check if a given node has the needed amount of free
6544 memory. In case the node has less memory or we cannot get the
6545 information from the node, this function raise an OpPrereqError
6548 @type lu: C{LogicalUnit}
6549 @param lu: a logical unit from which we get configuration data
6551 @param node: the node to check
6552 @type reason: C{str}
6553 @param reason: string to use in the error message
6554 @type requested: C{int}
6555 @param requested: the amount of memory in MiB to check for
6556 @type hypervisor_name: C{str}
6557 @param hypervisor_name: the hypervisor to ask for memory stats
6559 @return: node current free memory
6560 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6561 we cannot check the node
6564 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6565 nodeinfo[node].Raise("Can't get data from node %s" % node,
6566 prereq=True, ecode=errors.ECODE_ENVIRON)
6567 (_, _, (hv_info, )) = nodeinfo[node].payload
6569 free_mem = hv_info.get("memory_free", None)
6570 if not isinstance(free_mem, int):
6571 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6572 " was '%s'" % (node, free_mem),
6573 errors.ECODE_ENVIRON)
6574 if requested > free_mem:
6575 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6576 " needed %s MiB, available %s MiB" %
6577 (node, reason, requested, free_mem),
6582 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6583 """Checks if nodes have enough free disk space in the all VGs.
6585 This function check if all given nodes have the needed amount of
6586 free disk. In case any node has less disk or we cannot get the
6587 information from the node, this function raise an OpPrereqError
6590 @type lu: C{LogicalUnit}
6591 @param lu: a logical unit from which we get configuration data
6592 @type nodenames: C{list}
6593 @param nodenames: the list of node names to check
6594 @type req_sizes: C{dict}
6595 @param req_sizes: the hash of vg and corresponding amount of disk in
6597 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6598 or we cannot check the node
6601 for vg, req_size in req_sizes.items():
6602 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6605 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6606 """Checks if nodes have enough free disk space in the specified VG.
6608 This function check if all given nodes have the needed amount of
6609 free disk. In case any node has less disk or we cannot get the
6610 information from the node, this function raise an OpPrereqError
6613 @type lu: C{LogicalUnit}
6614 @param lu: a logical unit from which we get configuration data
6615 @type nodenames: C{list}
6616 @param nodenames: the list of node names to check
6618 @param vg: the volume group to check
6619 @type requested: C{int}
6620 @param requested: the amount of disk in MiB to check for
6621 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6622 or we cannot check the node
6625 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6626 for node in nodenames:
6627 info = nodeinfo[node]
6628 info.Raise("Cannot get current information from node %s" % node,
6629 prereq=True, ecode=errors.ECODE_ENVIRON)
6630 (_, (vg_info, ), _) = info.payload
6631 vg_free = vg_info.get("vg_free", None)
6632 if not isinstance(vg_free, int):
6633 raise errors.OpPrereqError("Can't compute free disk space on node"
6634 " %s for vg %s, result was '%s'" %
6635 (node, vg, vg_free), errors.ECODE_ENVIRON)
6636 if requested > vg_free:
6637 raise errors.OpPrereqError("Not enough disk space on target node %s"
6638 " vg %s: required %d MiB, available %d MiB" %
6639 (node, vg, requested, vg_free),
6643 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6644 """Checks if nodes have enough physical CPUs
6646 This function checks if all given nodes have the needed number of
6647 physical CPUs. In case any node has less CPUs or we cannot get the
6648 information from the node, this function raises an OpPrereqError
6651 @type lu: C{LogicalUnit}
6652 @param lu: a logical unit from which we get configuration data
6653 @type nodenames: C{list}
6654 @param nodenames: the list of node names to check
6655 @type requested: C{int}
6656 @param requested: the minimum acceptable number of physical CPUs
6657 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6658 or we cannot check the node
6661 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6662 for node in nodenames:
6663 info = nodeinfo[node]
6664 info.Raise("Cannot get current information from node %s" % node,
6665 prereq=True, ecode=errors.ECODE_ENVIRON)
6666 (_, _, (hv_info, )) = info.payload
6667 num_cpus = hv_info.get("cpu_total", None)
6668 if not isinstance(num_cpus, int):
6669 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6670 " on node %s, result was '%s'" %
6671 (node, num_cpus), errors.ECODE_ENVIRON)
6672 if requested > num_cpus:
6673 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6674 "required" % (node, num_cpus, requested),
6678 class LUInstanceStartup(LogicalUnit):
6679 """Starts an instance.
6682 HPATH = "instance-start"
6683 HTYPE = constants.HTYPE_INSTANCE
6686 def CheckArguments(self):
6688 if self.op.beparams:
6689 # fill the beparams dict
6690 objects.UpgradeBeParams(self.op.beparams)
6691 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6693 def ExpandNames(self):
6694 self._ExpandAndLockInstance()
6695 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6697 def DeclareLocks(self, level):
6698 if level == locking.LEVEL_NODE_RES:
6699 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6701 def BuildHooksEnv(self):
6704 This runs on master, primary and secondary nodes of the instance.
6708 "FORCE": self.op.force,
6711 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6715 def BuildHooksNodes(self):
6716 """Build hooks nodes.
6719 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6722 def CheckPrereq(self):
6723 """Check prerequisites.
6725 This checks that the instance is in the cluster.
6728 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6729 assert self.instance is not None, \
6730 "Cannot retrieve locked instance %s" % self.op.instance_name
6733 if self.op.hvparams:
6734 # check hypervisor parameter syntax (locally)
6735 cluster = self.cfg.GetClusterInfo()
6736 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6737 filled_hvp = cluster.FillHV(instance)
6738 filled_hvp.update(self.op.hvparams)
6739 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6740 hv_type.CheckParameterSyntax(filled_hvp)
6741 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6743 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6745 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6747 if self.primary_offline and self.op.ignore_offline_nodes:
6748 self.proc.LogWarning("Ignoring offline primary node")
6750 if self.op.hvparams or self.op.beparams:
6751 self.proc.LogWarning("Overridden parameters are ignored")
6753 _CheckNodeOnline(self, instance.primary_node)
6755 bep = self.cfg.GetClusterInfo().FillBE(instance)
6756 bep.update(self.op.beparams)
6758 # check bridges existence
6759 _CheckInstanceBridgesExist(self, instance)
6761 remote_info = self.rpc.call_instance_info(instance.primary_node,
6763 instance.hypervisor)
6764 remote_info.Raise("Error checking node %s" % instance.primary_node,
6765 prereq=True, ecode=errors.ECODE_ENVIRON)
6766 if not remote_info.payload: # not running already
6767 _CheckNodeFreeMemory(self, instance.primary_node,
6768 "starting instance %s" % instance.name,
6769 bep[constants.BE_MINMEM], instance.hypervisor)
6771 def Exec(self, feedback_fn):
6772 """Start the instance.
6775 instance = self.instance
6776 force = self.op.force
6778 if not self.op.no_remember:
6779 self.cfg.MarkInstanceUp(instance.name)
6781 if self.primary_offline:
6782 assert self.op.ignore_offline_nodes
6783 self.proc.LogInfo("Primary node offline, marked instance as started")
6785 node_current = instance.primary_node
6787 _StartInstanceDisks(self, instance, force)
6790 self.rpc.call_instance_start(node_current,
6791 (instance, self.op.hvparams,
6793 self.op.startup_paused)
6794 msg = result.fail_msg
6796 _ShutdownInstanceDisks(self, instance)
6797 raise errors.OpExecError("Could not start instance: %s" % msg)
6800 class LUInstanceReboot(LogicalUnit):
6801 """Reboot an instance.
6804 HPATH = "instance-reboot"
6805 HTYPE = constants.HTYPE_INSTANCE
6808 def ExpandNames(self):
6809 self._ExpandAndLockInstance()
6811 def BuildHooksEnv(self):
6814 This runs on master, primary and secondary nodes of the instance.
6818 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6819 "REBOOT_TYPE": self.op.reboot_type,
6820 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6823 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6827 def BuildHooksNodes(self):
6828 """Build hooks nodes.
6831 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6834 def CheckPrereq(self):
6835 """Check prerequisites.
6837 This checks that the instance is in the cluster.
6840 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6841 assert self.instance is not None, \
6842 "Cannot retrieve locked instance %s" % self.op.instance_name
6843 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6844 _CheckNodeOnline(self, instance.primary_node)
6846 # check bridges existence
6847 _CheckInstanceBridgesExist(self, instance)
6849 def Exec(self, feedback_fn):
6850 """Reboot the instance.
6853 instance = self.instance
6854 ignore_secondaries = self.op.ignore_secondaries
6855 reboot_type = self.op.reboot_type
6857 remote_info = self.rpc.call_instance_info(instance.primary_node,
6859 instance.hypervisor)
6860 remote_info.Raise("Error checking node %s" % instance.primary_node)
6861 instance_running = bool(remote_info.payload)
6863 node_current = instance.primary_node
6865 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6866 constants.INSTANCE_REBOOT_HARD]:
6867 for disk in instance.disks:
6868 self.cfg.SetDiskID(disk, node_current)
6869 result = self.rpc.call_instance_reboot(node_current, instance,
6871 self.op.shutdown_timeout)
6872 result.Raise("Could not reboot instance")
6874 if instance_running:
6875 result = self.rpc.call_instance_shutdown(node_current, instance,
6876 self.op.shutdown_timeout)
6877 result.Raise("Could not shutdown instance for full reboot")
6878 _ShutdownInstanceDisks(self, instance)
6880 self.LogInfo("Instance %s was already stopped, starting now",
6882 _StartInstanceDisks(self, instance, ignore_secondaries)
6883 result = self.rpc.call_instance_start(node_current,
6884 (instance, None, None), False)
6885 msg = result.fail_msg
6887 _ShutdownInstanceDisks(self, instance)
6888 raise errors.OpExecError("Could not start instance for"
6889 " full reboot: %s" % msg)
6891 self.cfg.MarkInstanceUp(instance.name)
6894 class LUInstanceShutdown(LogicalUnit):
6895 """Shutdown an instance.
6898 HPATH = "instance-stop"
6899 HTYPE = constants.HTYPE_INSTANCE
6902 def ExpandNames(self):
6903 self._ExpandAndLockInstance()
6905 def BuildHooksEnv(self):
6908 This runs on master, primary and secondary nodes of the instance.
6911 env = _BuildInstanceHookEnvByObject(self, self.instance)
6912 env["TIMEOUT"] = self.op.timeout
6915 def BuildHooksNodes(self):
6916 """Build hooks nodes.
6919 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6922 def CheckPrereq(self):
6923 """Check prerequisites.
6925 This checks that the instance is in the cluster.
6928 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6929 assert self.instance is not None, \
6930 "Cannot retrieve locked instance %s" % self.op.instance_name
6932 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6934 self.primary_offline = \
6935 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6937 if self.primary_offline and self.op.ignore_offline_nodes:
6938 self.proc.LogWarning("Ignoring offline primary node")
6940 _CheckNodeOnline(self, self.instance.primary_node)
6942 def Exec(self, feedback_fn):
6943 """Shutdown the instance.
6946 instance = self.instance
6947 node_current = instance.primary_node
6948 timeout = self.op.timeout
6950 if not self.op.no_remember:
6951 self.cfg.MarkInstanceDown(instance.name)
6953 if self.primary_offline:
6954 assert self.op.ignore_offline_nodes
6955 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6957 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6958 msg = result.fail_msg
6960 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6962 _ShutdownInstanceDisks(self, instance)
6965 class LUInstanceReinstall(LogicalUnit):
6966 """Reinstall an instance.
6969 HPATH = "instance-reinstall"
6970 HTYPE = constants.HTYPE_INSTANCE
6973 def ExpandNames(self):
6974 self._ExpandAndLockInstance()
6976 def BuildHooksEnv(self):
6979 This runs on master, primary and secondary nodes of the instance.
6982 return _BuildInstanceHookEnvByObject(self, self.instance)
6984 def BuildHooksNodes(self):
6985 """Build hooks nodes.
6988 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6991 def CheckPrereq(self):
6992 """Check prerequisites.
6994 This checks that the instance is in the cluster and is not running.
6997 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6998 assert instance is not None, \
6999 "Cannot retrieve locked instance %s" % self.op.instance_name
7000 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7001 " offline, cannot reinstall")
7003 if instance.disk_template == constants.DT_DISKLESS:
7004 raise errors.OpPrereqError("Instance '%s' has no disks" %
7005 self.op.instance_name,
7007 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7009 if self.op.os_type is not None:
7011 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7012 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7013 instance_os = self.op.os_type
7015 instance_os = instance.os
7017 nodelist = list(instance.all_nodes)
7019 if self.op.osparams:
7020 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7021 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7022 self.os_inst = i_osdict # the new dict (without defaults)
7026 self.instance = instance
7028 def Exec(self, feedback_fn):
7029 """Reinstall the instance.
7032 inst = self.instance
7034 if self.op.os_type is not None:
7035 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7036 inst.os = self.op.os_type
7037 # Write to configuration
7038 self.cfg.Update(inst, feedback_fn)
7040 _StartInstanceDisks(self, inst, None)
7042 feedback_fn("Running the instance OS create scripts...")
7043 # FIXME: pass debug option from opcode to backend
7044 result = self.rpc.call_instance_os_add(inst.primary_node,
7045 (inst, self.os_inst), True,
7046 self.op.debug_level)
7047 result.Raise("Could not install OS for instance %s on node %s" %
7048 (inst.name, inst.primary_node))
7050 _ShutdownInstanceDisks(self, inst)
7053 class LUInstanceRecreateDisks(LogicalUnit):
7054 """Recreate an instance's missing disks.
7057 HPATH = "instance-recreate-disks"
7058 HTYPE = constants.HTYPE_INSTANCE
7061 _MODIFYABLE = frozenset([
7062 constants.IDISK_SIZE,
7063 constants.IDISK_MODE,
7066 # New or changed disk parameters may have different semantics
7067 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7068 constants.IDISK_ADOPT,
7070 # TODO: Implement support changing VG while recreating
7072 constants.IDISK_METAVG,
7075 def _RunAllocator(self):
7076 """Run the allocator based on input opcode.
7079 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7082 # The allocator should actually run in "relocate" mode, but current
7083 # allocators don't support relocating all the nodes of an instance at
7084 # the same time. As a workaround we use "allocate" mode, but this is
7085 # suboptimal for two reasons:
7086 # - The instance name passed to the allocator is present in the list of
7087 # existing instances, so there could be a conflict within the
7088 # internal structures of the allocator. This doesn't happen with the
7089 # current allocators, but it's a liability.
7090 # - The allocator counts the resources used by the instance twice: once
7091 # because the instance exists already, and once because it tries to
7092 # allocate a new instance.
7093 # The allocator could choose some of the nodes on which the instance is
7094 # running, but that's not a problem. If the instance nodes are broken,
7095 # they should be already be marked as drained or offline, and hence
7096 # skipped by the allocator. If instance disks have been lost for other
7097 # reasons, then recreating the disks on the same nodes should be fine.
7098 disk_template = self.instance.disk_template
7099 spindle_use = be_full[constants.BE_SPINDLE_USE]
7100 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7101 disk_template=disk_template,
7102 tags=list(self.instance.GetTags()),
7103 os=self.instance.os,
7105 vcpus=be_full[constants.BE_VCPUS],
7106 memory=be_full[constants.BE_MAXMEM],
7107 spindle_use=spindle_use,
7108 disks=[{constants.IDISK_SIZE: d.size,
7109 constants.IDISK_MODE: d.mode}
7110 for d in self.instance.disks],
7111 hypervisor=self.instance.hypervisor)
7112 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7114 ial.Run(self.op.iallocator)
7116 assert req.RequiredNodes() == len(self.instance.all_nodes)
7119 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7120 " %s" % (self.op.iallocator, ial.info),
7123 self.op.nodes = ial.result
7124 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7125 self.op.instance_name, self.op.iallocator,
7126 utils.CommaJoin(ial.result))
7128 def CheckArguments(self):
7129 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7130 # Normalize and convert deprecated list of disk indices
7131 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7133 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7135 raise errors.OpPrereqError("Some disks have been specified more than"
7136 " once: %s" % utils.CommaJoin(duplicates),
7139 if self.op.iallocator and self.op.nodes:
7140 raise errors.OpPrereqError("Give either the iallocator or the new"
7141 " nodes, not both", errors.ECODE_INVAL)
7143 for (idx, params) in self.op.disks:
7144 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7145 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7147 raise errors.OpPrereqError("Parameters for disk %s try to change"
7148 " unmodifyable parameter(s): %s" %
7149 (idx, utils.CommaJoin(unsupported)),
7152 def ExpandNames(self):
7153 self._ExpandAndLockInstance()
7154 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7156 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7157 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7159 self.needed_locks[locking.LEVEL_NODE] = []
7160 if self.op.iallocator:
7161 # iallocator will select a new node in the same group
7162 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7163 self.needed_locks[locking.LEVEL_NODE_RES] = []
7165 def DeclareLocks(self, level):
7166 if level == locking.LEVEL_NODEGROUP:
7167 assert self.op.iallocator is not None
7168 assert not self.op.nodes
7169 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7170 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7171 # Lock the primary group used by the instance optimistically; this
7172 # requires going via the node before it's locked, requiring
7173 # verification later on
7174 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7175 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7177 elif level == locking.LEVEL_NODE:
7178 # If an allocator is used, then we lock all the nodes in the current
7179 # instance group, as we don't know yet which ones will be selected;
7180 # if we replace the nodes without using an allocator, locks are
7181 # already declared in ExpandNames; otherwise, we need to lock all the
7182 # instance nodes for disk re-creation
7183 if self.op.iallocator:
7184 assert not self.op.nodes
7185 assert not self.needed_locks[locking.LEVEL_NODE]
7186 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7188 # Lock member nodes of the group of the primary node
7189 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7190 self.needed_locks[locking.LEVEL_NODE].extend(
7191 self.cfg.GetNodeGroup(group_uuid).members)
7192 elif not self.op.nodes:
7193 self._LockInstancesNodes(primary_only=False)
7194 elif level == locking.LEVEL_NODE_RES:
7196 self.needed_locks[locking.LEVEL_NODE_RES] = \
7197 self.needed_locks[locking.LEVEL_NODE][:]
7199 def BuildHooksEnv(self):
7202 This runs on master, primary and secondary nodes of the instance.
7205 return _BuildInstanceHookEnvByObject(self, self.instance)
7207 def BuildHooksNodes(self):
7208 """Build hooks nodes.
7211 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7214 def CheckPrereq(self):
7215 """Check prerequisites.
7217 This checks that the instance is in the cluster and is not running.
7220 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7221 assert instance is not None, \
7222 "Cannot retrieve locked instance %s" % self.op.instance_name
7224 if len(self.op.nodes) != len(instance.all_nodes):
7225 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7226 " %d replacement nodes were specified" %
7227 (instance.name, len(instance.all_nodes),
7228 len(self.op.nodes)),
7230 assert instance.disk_template != constants.DT_DRBD8 or \
7231 len(self.op.nodes) == 2
7232 assert instance.disk_template != constants.DT_PLAIN or \
7233 len(self.op.nodes) == 1
7234 primary_node = self.op.nodes[0]
7236 primary_node = instance.primary_node
7237 if not self.op.iallocator:
7238 _CheckNodeOnline(self, primary_node)
7240 if instance.disk_template == constants.DT_DISKLESS:
7241 raise errors.OpPrereqError("Instance '%s' has no disks" %
7242 self.op.instance_name, errors.ECODE_INVAL)
7244 # Verify if node group locks are still correct
7245 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7247 # Node group locks are acquired only for the primary node (and only
7248 # when the allocator is used)
7249 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7252 # if we replace nodes *and* the old primary is offline, we don't
7253 # check the instance state
7254 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7255 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7256 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7257 msg="cannot recreate disks")
7260 self.disks = dict(self.op.disks)
7262 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7264 maxidx = max(self.disks.keys())
7265 if maxidx >= len(instance.disks):
7266 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7269 if ((self.op.nodes or self.op.iallocator) and
7270 sorted(self.disks.keys()) != range(len(instance.disks))):
7271 raise errors.OpPrereqError("Can't recreate disks partially and"
7272 " change the nodes at the same time",
7275 self.instance = instance
7277 if self.op.iallocator:
7278 self._RunAllocator()
7280 # Release unneeded node and node resource locks
7281 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7282 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7284 def Exec(self, feedback_fn):
7285 """Recreate the disks.
7288 instance = self.instance
7290 assert (self.owned_locks(locking.LEVEL_NODE) ==
7291 self.owned_locks(locking.LEVEL_NODE_RES))
7294 mods = [] # keeps track of needed changes
7296 for idx, disk in enumerate(instance.disks):
7298 changes = self.disks[idx]
7300 # Disk should not be recreated
7304 # update secondaries for disks, if needed
7305 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7306 # need to update the nodes and minors
7307 assert len(self.op.nodes) == 2
7308 assert len(disk.logical_id) == 6 # otherwise disk internals
7310 (_, _, old_port, _, _, old_secret) = disk.logical_id
7311 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7312 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7313 new_minors[0], new_minors[1], old_secret)
7314 assert len(disk.logical_id) == len(new_id)
7318 mods.append((idx, new_id, changes))
7320 # now that we have passed all asserts above, we can apply the mods
7321 # in a single run (to avoid partial changes)
7322 for idx, new_id, changes in mods:
7323 disk = instance.disks[idx]
7324 if new_id is not None:
7325 assert disk.dev_type == constants.LD_DRBD8
7326 disk.logical_id = new_id
7328 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7329 mode=changes.get(constants.IDISK_MODE, None))
7331 # change primary node, if needed
7333 instance.primary_node = self.op.nodes[0]
7334 self.LogWarning("Changing the instance's nodes, you will have to"
7335 " remove any disks left on the older nodes manually")
7338 self.cfg.Update(instance, feedback_fn)
7340 _CreateDisks(self, instance, to_skip=to_skip)
7343 class LUInstanceRename(LogicalUnit):
7344 """Rename an instance.
7347 HPATH = "instance-rename"
7348 HTYPE = constants.HTYPE_INSTANCE
7350 def CheckArguments(self):
7354 if self.op.ip_check and not self.op.name_check:
7355 # TODO: make the ip check more flexible and not depend on the name check
7356 raise errors.OpPrereqError("IP address check requires a name check",
7359 def BuildHooksEnv(self):
7362 This runs on master, primary and secondary nodes of the instance.
7365 env = _BuildInstanceHookEnvByObject(self, self.instance)
7366 env["INSTANCE_NEW_NAME"] = self.op.new_name
7369 def BuildHooksNodes(self):
7370 """Build hooks nodes.
7373 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7376 def CheckPrereq(self):
7377 """Check prerequisites.
7379 This checks that the instance is in the cluster and is not running.
7382 self.op.instance_name = _ExpandInstanceName(self.cfg,
7383 self.op.instance_name)
7384 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7385 assert instance is not None
7386 _CheckNodeOnline(self, instance.primary_node)
7387 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7388 msg="cannot rename")
7389 self.instance = instance
7391 new_name = self.op.new_name
7392 if self.op.name_check:
7393 hostname = netutils.GetHostname(name=new_name)
7394 if hostname.name != new_name:
7395 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7397 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7398 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7399 " same as given hostname '%s'") %
7400 (hostname.name, self.op.new_name),
7402 new_name = self.op.new_name = hostname.name
7403 if (self.op.ip_check and
7404 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7405 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7406 (hostname.ip, new_name),
7407 errors.ECODE_NOTUNIQUE)
7409 instance_list = self.cfg.GetInstanceList()
7410 if new_name in instance_list and new_name != instance.name:
7411 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7412 new_name, errors.ECODE_EXISTS)
7414 def Exec(self, feedback_fn):
7415 """Rename the instance.
7418 inst = self.instance
7419 old_name = inst.name
7421 rename_file_storage = False
7422 if (inst.disk_template in constants.DTS_FILEBASED and
7423 self.op.new_name != inst.name):
7424 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7425 rename_file_storage = True
7427 self.cfg.RenameInstance(inst.name, self.op.new_name)
7428 # Change the instance lock. This is definitely safe while we hold the BGL.
7429 # Otherwise the new lock would have to be added in acquired mode.
7431 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7432 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7434 # re-read the instance from the configuration after rename
7435 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7437 if rename_file_storage:
7438 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7439 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7440 old_file_storage_dir,
7441 new_file_storage_dir)
7442 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7443 " (but the instance has been renamed in Ganeti)" %
7444 (inst.primary_node, old_file_storage_dir,
7445 new_file_storage_dir))
7447 _StartInstanceDisks(self, inst, None)
7449 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7450 old_name, self.op.debug_level)
7451 msg = result.fail_msg
7453 msg = ("Could not run OS rename script for instance %s on node %s"
7454 " (but the instance has been renamed in Ganeti): %s" %
7455 (inst.name, inst.primary_node, msg))
7456 self.proc.LogWarning(msg)
7458 _ShutdownInstanceDisks(self, inst)
7463 class LUInstanceRemove(LogicalUnit):
7464 """Remove an instance.
7467 HPATH = "instance-remove"
7468 HTYPE = constants.HTYPE_INSTANCE
7471 def ExpandNames(self):
7472 self._ExpandAndLockInstance()
7473 self.needed_locks[locking.LEVEL_NODE] = []
7474 self.needed_locks[locking.LEVEL_NODE_RES] = []
7475 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7477 def DeclareLocks(self, level):
7478 if level == locking.LEVEL_NODE:
7479 self._LockInstancesNodes()
7480 elif level == locking.LEVEL_NODE_RES:
7482 self.needed_locks[locking.LEVEL_NODE_RES] = \
7483 self.needed_locks[locking.LEVEL_NODE][:]
7485 def BuildHooksEnv(self):
7488 This runs on master, primary and secondary nodes of the instance.
7491 env = _BuildInstanceHookEnvByObject(self, self.instance)
7492 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7495 def BuildHooksNodes(self):
7496 """Build hooks nodes.
7499 nl = [self.cfg.GetMasterNode()]
7500 nl_post = list(self.instance.all_nodes) + nl
7501 return (nl, nl_post)
7503 def CheckPrereq(self):
7504 """Check prerequisites.
7506 This checks that the instance is in the cluster.
7509 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7510 assert self.instance is not None, \
7511 "Cannot retrieve locked instance %s" % self.op.instance_name
7513 def Exec(self, feedback_fn):
7514 """Remove the instance.
7517 instance = self.instance
7518 logging.info("Shutting down instance %s on node %s",
7519 instance.name, instance.primary_node)
7521 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7522 self.op.shutdown_timeout)
7523 msg = result.fail_msg
7525 if self.op.ignore_failures:
7526 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7528 raise errors.OpExecError("Could not shutdown instance %s on"
7530 (instance.name, instance.primary_node, msg))
7532 assert (self.owned_locks(locking.LEVEL_NODE) ==
7533 self.owned_locks(locking.LEVEL_NODE_RES))
7534 assert not (set(instance.all_nodes) -
7535 self.owned_locks(locking.LEVEL_NODE)), \
7536 "Not owning correct locks"
7538 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7541 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7542 """Utility function to remove an instance.
7545 logging.info("Removing block devices for instance %s", instance.name)
7547 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7548 if not ignore_failures:
7549 raise errors.OpExecError("Can't remove instance's disks")
7550 feedback_fn("Warning: can't remove instance's disks")
7552 logging.info("Removing instance %s out of cluster config", instance.name)
7554 lu.cfg.RemoveInstance(instance.name)
7556 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7557 "Instance lock removal conflict"
7559 # Remove lock for the instance
7560 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7563 class LUInstanceQuery(NoHooksLU):
7564 """Logical unit for querying instances.
7567 # pylint: disable=W0142
7570 def CheckArguments(self):
7571 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7572 self.op.output_fields, self.op.use_locking)
7574 def ExpandNames(self):
7575 self.iq.ExpandNames(self)
7577 def DeclareLocks(self, level):
7578 self.iq.DeclareLocks(self, level)
7580 def Exec(self, feedback_fn):
7581 return self.iq.OldStyleQuery(self)
7584 class LUInstanceFailover(LogicalUnit):
7585 """Failover an instance.
7588 HPATH = "instance-failover"
7589 HTYPE = constants.HTYPE_INSTANCE
7592 def CheckArguments(self):
7593 """Check the arguments.
7596 self.iallocator = getattr(self.op, "iallocator", None)
7597 self.target_node = getattr(self.op, "target_node", None)
7599 def ExpandNames(self):
7600 self._ExpandAndLockInstance()
7602 if self.op.target_node is not None:
7603 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7605 self.needed_locks[locking.LEVEL_NODE] = []
7606 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7608 self.needed_locks[locking.LEVEL_NODE_RES] = []
7609 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7611 ignore_consistency = self.op.ignore_consistency
7612 shutdown_timeout = self.op.shutdown_timeout
7613 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7616 ignore_consistency=ignore_consistency,
7617 shutdown_timeout=shutdown_timeout,
7618 ignore_ipolicy=self.op.ignore_ipolicy)
7619 self.tasklets = [self._migrater]
7621 def DeclareLocks(self, level):
7622 if level == locking.LEVEL_NODE:
7623 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7624 if instance.disk_template in constants.DTS_EXT_MIRROR:
7625 if self.op.target_node is None:
7626 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7628 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7629 self.op.target_node]
7630 del self.recalculate_locks[locking.LEVEL_NODE]
7632 self._LockInstancesNodes()
7633 elif level == locking.LEVEL_NODE_RES:
7635 self.needed_locks[locking.LEVEL_NODE_RES] = \
7636 self.needed_locks[locking.LEVEL_NODE][:]
7638 def BuildHooksEnv(self):
7641 This runs on master, primary and secondary nodes of the instance.
7644 instance = self._migrater.instance
7645 source_node = instance.primary_node
7646 target_node = self.op.target_node
7648 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7649 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7650 "OLD_PRIMARY": source_node,
7651 "NEW_PRIMARY": target_node,
7654 if instance.disk_template in constants.DTS_INT_MIRROR:
7655 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7656 env["NEW_SECONDARY"] = source_node
7658 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7660 env.update(_BuildInstanceHookEnvByObject(self, instance))
7664 def BuildHooksNodes(self):
7665 """Build hooks nodes.
7668 instance = self._migrater.instance
7669 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7670 return (nl, nl + [instance.primary_node])
7673 class LUInstanceMigrate(LogicalUnit):
7674 """Migrate an instance.
7676 This is migration without shutting down, compared to the failover,
7677 which is done with shutdown.
7680 HPATH = "instance-migrate"
7681 HTYPE = constants.HTYPE_INSTANCE
7684 def ExpandNames(self):
7685 self._ExpandAndLockInstance()
7687 if self.op.target_node is not None:
7688 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7690 self.needed_locks[locking.LEVEL_NODE] = []
7691 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7693 self.needed_locks[locking.LEVEL_NODE] = []
7694 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7697 TLMigrateInstance(self, self.op.instance_name,
7698 cleanup=self.op.cleanup,
7700 fallback=self.op.allow_failover,
7701 allow_runtime_changes=self.op.allow_runtime_changes,
7702 ignore_ipolicy=self.op.ignore_ipolicy)
7703 self.tasklets = [self._migrater]
7705 def DeclareLocks(self, level):
7706 if level == locking.LEVEL_NODE:
7707 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7708 if instance.disk_template in constants.DTS_EXT_MIRROR:
7709 if self.op.target_node is None:
7710 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7712 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7713 self.op.target_node]
7714 del self.recalculate_locks[locking.LEVEL_NODE]
7716 self._LockInstancesNodes()
7717 elif level == locking.LEVEL_NODE_RES:
7719 self.needed_locks[locking.LEVEL_NODE_RES] = \
7720 self.needed_locks[locking.LEVEL_NODE][:]
7722 def BuildHooksEnv(self):
7725 This runs on master, primary and secondary nodes of the instance.
7728 instance = self._migrater.instance
7729 source_node = instance.primary_node
7730 target_node = self.op.target_node
7731 env = _BuildInstanceHookEnvByObject(self, instance)
7733 "MIGRATE_LIVE": self._migrater.live,
7734 "MIGRATE_CLEANUP": self.op.cleanup,
7735 "OLD_PRIMARY": source_node,
7736 "NEW_PRIMARY": target_node,
7737 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7740 if instance.disk_template in constants.DTS_INT_MIRROR:
7741 env["OLD_SECONDARY"] = target_node
7742 env["NEW_SECONDARY"] = source_node
7744 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7748 def BuildHooksNodes(self):
7749 """Build hooks nodes.
7752 instance = self._migrater.instance
7753 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7754 return (nl, nl + [instance.primary_node])
7757 class LUInstanceMove(LogicalUnit):
7758 """Move an instance by data-copying.
7761 HPATH = "instance-move"
7762 HTYPE = constants.HTYPE_INSTANCE
7765 def ExpandNames(self):
7766 self._ExpandAndLockInstance()
7767 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7768 self.op.target_node = target_node
7769 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7770 self.needed_locks[locking.LEVEL_NODE_RES] = []
7771 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7773 def DeclareLocks(self, level):
7774 if level == locking.LEVEL_NODE:
7775 self._LockInstancesNodes(primary_only=True)
7776 elif level == locking.LEVEL_NODE_RES:
7778 self.needed_locks[locking.LEVEL_NODE_RES] = \
7779 self.needed_locks[locking.LEVEL_NODE][:]
7781 def BuildHooksEnv(self):
7784 This runs on master, primary and secondary nodes of the instance.
7788 "TARGET_NODE": self.op.target_node,
7789 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7791 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7794 def BuildHooksNodes(self):
7795 """Build hooks nodes.
7799 self.cfg.GetMasterNode(),
7800 self.instance.primary_node,
7801 self.op.target_node,
7805 def CheckPrereq(self):
7806 """Check prerequisites.
7808 This checks that the instance is in the cluster.
7811 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7812 assert self.instance is not None, \
7813 "Cannot retrieve locked instance %s" % self.op.instance_name
7815 node = self.cfg.GetNodeInfo(self.op.target_node)
7816 assert node is not None, \
7817 "Cannot retrieve locked node %s" % self.op.target_node
7819 self.target_node = target_node = node.name
7821 if target_node == instance.primary_node:
7822 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7823 (instance.name, target_node),
7826 bep = self.cfg.GetClusterInfo().FillBE(instance)
7828 for idx, dsk in enumerate(instance.disks):
7829 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7830 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7831 " cannot copy" % idx, errors.ECODE_STATE)
7833 _CheckNodeOnline(self, target_node)
7834 _CheckNodeNotDrained(self, target_node)
7835 _CheckNodeVmCapable(self, target_node)
7836 cluster = self.cfg.GetClusterInfo()
7837 group_info = self.cfg.GetNodeGroup(node.group)
7838 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7839 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7840 ignore=self.op.ignore_ipolicy)
7842 if instance.admin_state == constants.ADMINST_UP:
7843 # check memory requirements on the secondary node
7844 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7845 instance.name, bep[constants.BE_MAXMEM],
7846 instance.hypervisor)
7848 self.LogInfo("Not checking memory on the secondary node as"
7849 " instance will not be started")
7851 # check bridge existance
7852 _CheckInstanceBridgesExist(self, instance, node=target_node)
7854 def Exec(self, feedback_fn):
7855 """Move an instance.
7857 The move is done by shutting it down on its present node, copying
7858 the data over (slow) and starting it on the new node.
7861 instance = self.instance
7863 source_node = instance.primary_node
7864 target_node = self.target_node
7866 self.LogInfo("Shutting down instance %s on source node %s",
7867 instance.name, source_node)
7869 assert (self.owned_locks(locking.LEVEL_NODE) ==
7870 self.owned_locks(locking.LEVEL_NODE_RES))
7872 result = self.rpc.call_instance_shutdown(source_node, instance,
7873 self.op.shutdown_timeout)
7874 msg = result.fail_msg
7876 if self.op.ignore_consistency:
7877 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7878 " Proceeding anyway. Please make sure node"
7879 " %s is down. Error details: %s",
7880 instance.name, source_node, source_node, msg)
7882 raise errors.OpExecError("Could not shutdown instance %s on"
7884 (instance.name, source_node, msg))
7886 # create the target disks
7888 _CreateDisks(self, instance, target_node=target_node)
7889 except errors.OpExecError:
7890 self.LogWarning("Device creation failed, reverting...")
7892 _RemoveDisks(self, instance, target_node=target_node)
7894 self.cfg.ReleaseDRBDMinors(instance.name)
7897 cluster_name = self.cfg.GetClusterInfo().cluster_name
7900 # activate, get path, copy the data over
7901 for idx, disk in enumerate(instance.disks):
7902 self.LogInfo("Copying data for disk %d", idx)
7903 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7904 instance.name, True, idx)
7906 self.LogWarning("Can't assemble newly created disk %d: %s",
7907 idx, result.fail_msg)
7908 errs.append(result.fail_msg)
7910 dev_path = result.payload
7911 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7912 target_node, dev_path,
7915 self.LogWarning("Can't copy data over for disk %d: %s",
7916 idx, result.fail_msg)
7917 errs.append(result.fail_msg)
7921 self.LogWarning("Some disks failed to copy, aborting")
7923 _RemoveDisks(self, instance, target_node=target_node)
7925 self.cfg.ReleaseDRBDMinors(instance.name)
7926 raise errors.OpExecError("Errors during disk copy: %s" %
7929 instance.primary_node = target_node
7930 self.cfg.Update(instance, feedback_fn)
7932 self.LogInfo("Removing the disks on the original node")
7933 _RemoveDisks(self, instance, target_node=source_node)
7935 # Only start the instance if it's marked as up
7936 if instance.admin_state == constants.ADMINST_UP:
7937 self.LogInfo("Starting instance %s on node %s",
7938 instance.name, target_node)
7940 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7941 ignore_secondaries=True)
7943 _ShutdownInstanceDisks(self, instance)
7944 raise errors.OpExecError("Can't activate the instance's disks")
7946 result = self.rpc.call_instance_start(target_node,
7947 (instance, None, None), False)
7948 msg = result.fail_msg
7950 _ShutdownInstanceDisks(self, instance)
7951 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7952 (instance.name, target_node, msg))
7955 class LUNodeMigrate(LogicalUnit):
7956 """Migrate all instances from a node.
7959 HPATH = "node-migrate"
7960 HTYPE = constants.HTYPE_NODE
7963 def CheckArguments(self):
7966 def ExpandNames(self):
7967 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7969 self.share_locks = _ShareAll()
7970 self.needed_locks = {
7971 locking.LEVEL_NODE: [self.op.node_name],
7974 def BuildHooksEnv(self):
7977 This runs on the master, the primary and all the secondaries.
7981 "NODE_NAME": self.op.node_name,
7982 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7985 def BuildHooksNodes(self):
7986 """Build hooks nodes.
7989 nl = [self.cfg.GetMasterNode()]
7992 def CheckPrereq(self):
7995 def Exec(self, feedback_fn):
7996 # Prepare jobs for migration instances
7997 allow_runtime_changes = self.op.allow_runtime_changes
7999 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8002 iallocator=self.op.iallocator,
8003 target_node=self.op.target_node,
8004 allow_runtime_changes=allow_runtime_changes,
8005 ignore_ipolicy=self.op.ignore_ipolicy)]
8006 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8009 # TODO: Run iallocator in this opcode and pass correct placement options to
8010 # OpInstanceMigrate. Since other jobs can modify the cluster between
8011 # running the iallocator and the actual migration, a good consistency model
8012 # will have to be found.
8014 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8015 frozenset([self.op.node_name]))
8017 return ResultWithJobs(jobs)
8020 class TLMigrateInstance(Tasklet):
8021 """Tasklet class for instance migration.
8024 @ivar live: whether the migration will be done live or non-live;
8025 this variable is initalized only after CheckPrereq has run
8026 @type cleanup: boolean
8027 @ivar cleanup: Wheater we cleanup from a failed migration
8028 @type iallocator: string
8029 @ivar iallocator: The iallocator used to determine target_node
8030 @type target_node: string
8031 @ivar target_node: If given, the target_node to reallocate the instance to
8032 @type failover: boolean
8033 @ivar failover: Whether operation results in failover or migration
8034 @type fallback: boolean
8035 @ivar fallback: Whether fallback to failover is allowed if migration not
8037 @type ignore_consistency: boolean
8038 @ivar ignore_consistency: Wheter we should ignore consistency between source
8040 @type shutdown_timeout: int
8041 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8042 @type ignore_ipolicy: bool
8043 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8048 _MIGRATION_POLL_INTERVAL = 1 # seconds
8049 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8051 def __init__(self, lu, instance_name, cleanup=False,
8052 failover=False, fallback=False,
8053 ignore_consistency=False,
8054 allow_runtime_changes=True,
8055 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8056 ignore_ipolicy=False):
8057 """Initializes this class.
8060 Tasklet.__init__(self, lu)
8063 self.instance_name = instance_name
8064 self.cleanup = cleanup
8065 self.live = False # will be overridden later
8066 self.failover = failover
8067 self.fallback = fallback
8068 self.ignore_consistency = ignore_consistency
8069 self.shutdown_timeout = shutdown_timeout
8070 self.ignore_ipolicy = ignore_ipolicy
8071 self.allow_runtime_changes = allow_runtime_changes
8073 def CheckPrereq(self):
8074 """Check prerequisites.
8076 This checks that the instance is in the cluster.
8079 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8080 instance = self.cfg.GetInstanceInfo(instance_name)
8081 assert instance is not None
8082 self.instance = instance
8083 cluster = self.cfg.GetClusterInfo()
8085 if (not self.cleanup and
8086 not instance.admin_state == constants.ADMINST_UP and
8087 not self.failover and self.fallback):
8088 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8089 " switching to failover")
8090 self.failover = True
8092 if instance.disk_template not in constants.DTS_MIRRORED:
8097 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8098 " %s" % (instance.disk_template, text),
8101 if instance.disk_template in constants.DTS_EXT_MIRROR:
8102 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8104 if self.lu.op.iallocator:
8105 self._RunAllocator()
8107 # We set set self.target_node as it is required by
8109 self.target_node = self.lu.op.target_node
8111 # Check that the target node is correct in terms of instance policy
8112 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8113 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8114 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8116 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8117 ignore=self.ignore_ipolicy)
8119 # self.target_node is already populated, either directly or by the
8121 target_node = self.target_node
8122 if self.target_node == instance.primary_node:
8123 raise errors.OpPrereqError("Cannot migrate instance %s"
8124 " to its primary (%s)" %
8125 (instance.name, instance.primary_node),
8128 if len(self.lu.tasklets) == 1:
8129 # It is safe to release locks only when we're the only tasklet
8131 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8132 keep=[instance.primary_node, self.target_node])
8135 secondary_nodes = instance.secondary_nodes
8136 if not secondary_nodes:
8137 raise errors.ConfigurationError("No secondary node but using"
8138 " %s disk template" %
8139 instance.disk_template)
8140 target_node = secondary_nodes[0]
8141 if self.lu.op.iallocator or (self.lu.op.target_node and
8142 self.lu.op.target_node != target_node):
8144 text = "failed over"
8147 raise errors.OpPrereqError("Instances with disk template %s cannot"
8148 " be %s to arbitrary nodes"
8149 " (neither an iallocator nor a target"
8150 " node can be passed)" %
8151 (instance.disk_template, text),
8153 nodeinfo = self.cfg.GetNodeInfo(target_node)
8154 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8155 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8157 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8158 ignore=self.ignore_ipolicy)
8160 i_be = cluster.FillBE(instance)
8162 # check memory requirements on the secondary node
8163 if (not self.cleanup and
8164 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8165 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8166 "migrating instance %s" %
8168 i_be[constants.BE_MINMEM],
8169 instance.hypervisor)
8171 self.lu.LogInfo("Not checking memory on the secondary node as"
8172 " instance will not be started")
8174 # check if failover must be forced instead of migration
8175 if (not self.cleanup and not self.failover and
8176 i_be[constants.BE_ALWAYS_FAILOVER]):
8177 self.lu.LogInfo("Instance configured to always failover; fallback"
8179 self.failover = True
8181 # check bridge existance
8182 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8184 if not self.cleanup:
8185 _CheckNodeNotDrained(self.lu, target_node)
8186 if not self.failover:
8187 result = self.rpc.call_instance_migratable(instance.primary_node,
8189 if result.fail_msg and self.fallback:
8190 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8192 self.failover = True
8194 result.Raise("Can't migrate, please use failover",
8195 prereq=True, ecode=errors.ECODE_STATE)
8197 assert not (self.failover and self.cleanup)
8199 if not self.failover:
8200 if self.lu.op.live is not None and self.lu.op.mode is not None:
8201 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8202 " parameters are accepted",
8204 if self.lu.op.live is not None:
8206 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8208 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8209 # reset the 'live' parameter to None so that repeated
8210 # invocations of CheckPrereq do not raise an exception
8211 self.lu.op.live = None
8212 elif self.lu.op.mode is None:
8213 # read the default value from the hypervisor
8214 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8215 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8217 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8219 # Failover is never live
8222 if not (self.failover or self.cleanup):
8223 remote_info = self.rpc.call_instance_info(instance.primary_node,
8225 instance.hypervisor)
8226 remote_info.Raise("Error checking instance on node %s" %
8227 instance.primary_node)
8228 instance_running = bool(remote_info.payload)
8229 if instance_running:
8230 self.current_mem = int(remote_info.payload["memory"])
8232 def _RunAllocator(self):
8233 """Run the allocator based on input opcode.
8236 # FIXME: add a self.ignore_ipolicy option
8237 req = iallocator.IAReqRelocate(name=self.instance_name,
8238 relocate_from=[self.instance.primary_node])
8239 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8241 ial.Run(self.lu.op.iallocator)
8244 raise errors.OpPrereqError("Can't compute nodes using"
8245 " iallocator '%s': %s" %
8246 (self.lu.op.iallocator, ial.info),
8248 self.target_node = ial.result[0]
8249 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8250 self.instance_name, self.lu.op.iallocator,
8251 utils.CommaJoin(ial.result))
8253 def _WaitUntilSync(self):
8254 """Poll with custom rpc for disk sync.
8256 This uses our own step-based rpc call.
8259 self.feedback_fn("* wait until resync is done")
8263 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8265 (self.instance.disks,
8268 for node, nres in result.items():
8269 nres.Raise("Cannot resync disks on node %s" % node)
8270 node_done, node_percent = nres.payload
8271 all_done = all_done and node_done
8272 if node_percent is not None:
8273 min_percent = min(min_percent, node_percent)
8275 if min_percent < 100:
8276 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8279 def _EnsureSecondary(self, node):
8280 """Demote a node to secondary.
8283 self.feedback_fn("* switching node %s to secondary mode" % node)
8285 for dev in self.instance.disks:
8286 self.cfg.SetDiskID(dev, node)
8288 result = self.rpc.call_blockdev_close(node, self.instance.name,
8289 self.instance.disks)
8290 result.Raise("Cannot change disk to secondary on node %s" % node)
8292 def _GoStandalone(self):
8293 """Disconnect from the network.
8296 self.feedback_fn("* changing into standalone mode")
8297 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8298 self.instance.disks)
8299 for node, nres in result.items():
8300 nres.Raise("Cannot disconnect disks node %s" % node)
8302 def _GoReconnect(self, multimaster):
8303 """Reconnect to the network.
8309 msg = "single-master"
8310 self.feedback_fn("* changing disks into %s mode" % msg)
8311 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8312 (self.instance.disks, self.instance),
8313 self.instance.name, multimaster)
8314 for node, nres in result.items():
8315 nres.Raise("Cannot change disks config on node %s" % node)
8317 def _ExecCleanup(self):
8318 """Try to cleanup after a failed migration.
8320 The cleanup is done by:
8321 - check that the instance is running only on one node
8322 (and update the config if needed)
8323 - change disks on its secondary node to secondary
8324 - wait until disks are fully synchronized
8325 - disconnect from the network
8326 - change disks into single-master mode
8327 - wait again until disks are fully synchronized
8330 instance = self.instance
8331 target_node = self.target_node
8332 source_node = self.source_node
8334 # check running on only one node
8335 self.feedback_fn("* checking where the instance actually runs"
8336 " (if this hangs, the hypervisor might be in"
8338 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8339 for node, result in ins_l.items():
8340 result.Raise("Can't contact node %s" % node)
8342 runningon_source = instance.name in ins_l[source_node].payload
8343 runningon_target = instance.name in ins_l[target_node].payload
8345 if runningon_source and runningon_target:
8346 raise errors.OpExecError("Instance seems to be running on two nodes,"
8347 " or the hypervisor is confused; you will have"
8348 " to ensure manually that it runs only on one"
8349 " and restart this operation")
8351 if not (runningon_source or runningon_target):
8352 raise errors.OpExecError("Instance does not seem to be running at all;"
8353 " in this case it's safer to repair by"
8354 " running 'gnt-instance stop' to ensure disk"
8355 " shutdown, and then restarting it")
8357 if runningon_target:
8358 # the migration has actually succeeded, we need to update the config
8359 self.feedback_fn("* instance running on secondary node (%s),"
8360 " updating config" % target_node)
8361 instance.primary_node = target_node
8362 self.cfg.Update(instance, self.feedback_fn)
8363 demoted_node = source_node
8365 self.feedback_fn("* instance confirmed to be running on its"
8366 " primary node (%s)" % source_node)
8367 demoted_node = target_node
8369 if instance.disk_template in constants.DTS_INT_MIRROR:
8370 self._EnsureSecondary(demoted_node)
8372 self._WaitUntilSync()
8373 except errors.OpExecError:
8374 # we ignore here errors, since if the device is standalone, it
8375 # won't be able to sync
8377 self._GoStandalone()
8378 self._GoReconnect(False)
8379 self._WaitUntilSync()
8381 self.feedback_fn("* done")
8383 def _RevertDiskStatus(self):
8384 """Try to revert the disk status after a failed migration.
8387 target_node = self.target_node
8388 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8392 self._EnsureSecondary(target_node)
8393 self._GoStandalone()
8394 self._GoReconnect(False)
8395 self._WaitUntilSync()
8396 except errors.OpExecError, err:
8397 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8398 " please try to recover the instance manually;"
8399 " error '%s'" % str(err))
8401 def _AbortMigration(self):
8402 """Call the hypervisor code to abort a started migration.
8405 instance = self.instance
8406 target_node = self.target_node
8407 source_node = self.source_node
8408 migration_info = self.migration_info
8410 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8414 abort_msg = abort_result.fail_msg
8416 logging.error("Aborting migration failed on target node %s: %s",
8417 target_node, abort_msg)
8418 # Don't raise an exception here, as we stil have to try to revert the
8419 # disk status, even if this step failed.
8421 abort_result = self.rpc.call_instance_finalize_migration_src(
8422 source_node, instance, False, self.live)
8423 abort_msg = abort_result.fail_msg
8425 logging.error("Aborting migration failed on source node %s: %s",
8426 source_node, abort_msg)
8428 def _ExecMigration(self):
8429 """Migrate an instance.
8431 The migrate is done by:
8432 - change the disks into dual-master mode
8433 - wait until disks are fully synchronized again
8434 - migrate the instance
8435 - change disks on the new secondary node (the old primary) to secondary
8436 - wait until disks are fully synchronized
8437 - change disks into single-master mode
8440 instance = self.instance
8441 target_node = self.target_node
8442 source_node = self.source_node
8444 # Check for hypervisor version mismatch and warn the user.
8445 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8446 None, [self.instance.hypervisor])
8447 for ninfo in nodeinfo.values():
8448 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8450 (_, _, (src_info, )) = nodeinfo[source_node].payload
8451 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8453 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8454 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8455 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8456 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8457 if src_version != dst_version:
8458 self.feedback_fn("* warning: hypervisor version mismatch between"
8459 " source (%s) and target (%s) node" %
8460 (src_version, dst_version))
8462 self.feedback_fn("* checking disk consistency between source and target")
8463 for (idx, dev) in enumerate(instance.disks):
8464 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8465 raise errors.OpExecError("Disk %s is degraded or not fully"
8466 " synchronized on target node,"
8467 " aborting migration" % idx)
8469 if self.current_mem > self.tgt_free_mem:
8470 if not self.allow_runtime_changes:
8471 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8472 " free memory to fit instance %s on target"
8473 " node %s (have %dMB, need %dMB)" %
8474 (instance.name, target_node,
8475 self.tgt_free_mem, self.current_mem))
8476 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8477 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8480 rpcres.Raise("Cannot modify instance runtime memory")
8482 # First get the migration information from the remote node
8483 result = self.rpc.call_migration_info(source_node, instance)
8484 msg = result.fail_msg
8486 log_err = ("Failed fetching source migration information from %s: %s" %
8488 logging.error(log_err)
8489 raise errors.OpExecError(log_err)
8491 self.migration_info = migration_info = result.payload
8493 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8494 # Then switch the disks to master/master mode
8495 self._EnsureSecondary(target_node)
8496 self._GoStandalone()
8497 self._GoReconnect(True)
8498 self._WaitUntilSync()
8500 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8501 result = self.rpc.call_accept_instance(target_node,
8504 self.nodes_ip[target_node])
8506 msg = result.fail_msg
8508 logging.error("Instance pre-migration failed, trying to revert"
8509 " disk status: %s", msg)
8510 self.feedback_fn("Pre-migration failed, aborting")
8511 self._AbortMigration()
8512 self._RevertDiskStatus()
8513 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8514 (instance.name, msg))
8516 self.feedback_fn("* migrating instance to %s" % target_node)
8517 result = self.rpc.call_instance_migrate(source_node, instance,
8518 self.nodes_ip[target_node],
8520 msg = result.fail_msg
8522 logging.error("Instance migration failed, trying to revert"
8523 " disk status: %s", msg)
8524 self.feedback_fn("Migration failed, aborting")
8525 self._AbortMigration()
8526 self._RevertDiskStatus()
8527 raise errors.OpExecError("Could not migrate instance %s: %s" %
8528 (instance.name, msg))
8530 self.feedback_fn("* starting memory transfer")
8531 last_feedback = time.time()
8533 result = self.rpc.call_instance_get_migration_status(source_node,
8535 msg = result.fail_msg
8536 ms = result.payload # MigrationStatus instance
8537 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8538 logging.error("Instance migration failed, trying to revert"
8539 " disk status: %s", msg)
8540 self.feedback_fn("Migration failed, aborting")
8541 self._AbortMigration()
8542 self._RevertDiskStatus()
8543 raise errors.OpExecError("Could not migrate instance %s: %s" %
8544 (instance.name, msg))
8546 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8547 self.feedback_fn("* memory transfer complete")
8550 if (utils.TimeoutExpired(last_feedback,
8551 self._MIGRATION_FEEDBACK_INTERVAL) and
8552 ms.transferred_ram is not None):
8553 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8554 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8555 last_feedback = time.time()
8557 time.sleep(self._MIGRATION_POLL_INTERVAL)
8559 result = self.rpc.call_instance_finalize_migration_src(source_node,
8563 msg = result.fail_msg
8565 logging.error("Instance migration succeeded, but finalization failed"
8566 " on the source node: %s", msg)
8567 raise errors.OpExecError("Could not finalize instance migration: %s" %
8570 instance.primary_node = target_node
8572 # distribute new instance config to the other nodes
8573 self.cfg.Update(instance, self.feedback_fn)
8575 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8579 msg = result.fail_msg
8581 logging.error("Instance migration succeeded, but finalization failed"
8582 " on the target node: %s", msg)
8583 raise errors.OpExecError("Could not finalize instance migration: %s" %
8586 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8587 self._EnsureSecondary(source_node)
8588 self._WaitUntilSync()
8589 self._GoStandalone()
8590 self._GoReconnect(False)
8591 self._WaitUntilSync()
8593 # If the instance's disk template is `rbd' and there was a successful
8594 # migration, unmap the device from the source node.
8595 if self.instance.disk_template == constants.DT_RBD:
8596 disks = _ExpandCheckDisks(instance, instance.disks)
8597 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8599 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8600 msg = result.fail_msg
8602 logging.error("Migration was successful, but couldn't unmap the"
8603 " block device %s on source node %s: %s",
8604 disk.iv_name, source_node, msg)
8605 logging.error("You need to unmap the device %s manually on %s",
8606 disk.iv_name, source_node)
8608 self.feedback_fn("* done")
8610 def _ExecFailover(self):
8611 """Failover an instance.
8613 The failover is done by shutting it down on its present node and
8614 starting it on the secondary.
8617 instance = self.instance
8618 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8620 source_node = instance.primary_node
8621 target_node = self.target_node
8623 if instance.admin_state == constants.ADMINST_UP:
8624 self.feedback_fn("* checking disk consistency between source and target")
8625 for (idx, dev) in enumerate(instance.disks):
8626 # for drbd, these are drbd over lvm
8627 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8629 if primary_node.offline:
8630 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8632 (primary_node.name, idx, target_node))
8633 elif not self.ignore_consistency:
8634 raise errors.OpExecError("Disk %s is degraded on target node,"
8635 " aborting failover" % idx)
8637 self.feedback_fn("* not checking disk consistency as instance is not"
8640 self.feedback_fn("* shutting down instance on source node")
8641 logging.info("Shutting down instance %s on node %s",
8642 instance.name, source_node)
8644 result = self.rpc.call_instance_shutdown(source_node, instance,
8645 self.shutdown_timeout)
8646 msg = result.fail_msg
8648 if self.ignore_consistency or primary_node.offline:
8649 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8650 " proceeding anyway; please make sure node"
8651 " %s is down; error details: %s",
8652 instance.name, source_node, source_node, msg)
8654 raise errors.OpExecError("Could not shutdown instance %s on"
8656 (instance.name, source_node, msg))
8658 self.feedback_fn("* deactivating the instance's disks on source node")
8659 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8660 raise errors.OpExecError("Can't shut down the instance's disks")
8662 instance.primary_node = target_node
8663 # distribute new instance config to the other nodes
8664 self.cfg.Update(instance, self.feedback_fn)
8666 # Only start the instance if it's marked as up
8667 if instance.admin_state == constants.ADMINST_UP:
8668 self.feedback_fn("* activating the instance's disks on target node %s" %
8670 logging.info("Starting instance %s on node %s",
8671 instance.name, target_node)
8673 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8674 ignore_secondaries=True)
8676 _ShutdownInstanceDisks(self.lu, instance)
8677 raise errors.OpExecError("Can't activate the instance's disks")
8679 self.feedback_fn("* starting the instance on the target node %s" %
8681 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8683 msg = result.fail_msg
8685 _ShutdownInstanceDisks(self.lu, instance)
8686 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8687 (instance.name, target_node, msg))
8689 def Exec(self, feedback_fn):
8690 """Perform the migration.
8693 self.feedback_fn = feedback_fn
8694 self.source_node = self.instance.primary_node
8696 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8697 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8698 self.target_node = self.instance.secondary_nodes[0]
8699 # Otherwise self.target_node has been populated either
8700 # directly, or through an iallocator.
8702 self.all_nodes = [self.source_node, self.target_node]
8703 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8704 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8707 feedback_fn("Failover instance %s" % self.instance.name)
8708 self._ExecFailover()
8710 feedback_fn("Migrating instance %s" % self.instance.name)
8713 return self._ExecCleanup()
8715 return self._ExecMigration()
8718 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8720 """Wrapper around L{_CreateBlockDevInner}.
8722 This method annotates the root device first.
8725 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8726 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8730 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8732 """Create a tree of block devices on a given node.
8734 If this device type has to be created on secondaries, create it and
8737 If not, just recurse to children keeping the same 'force' value.
8739 @attention: The device has to be annotated already.
8741 @param lu: the lu on whose behalf we execute
8742 @param node: the node on which to create the device
8743 @type instance: L{objects.Instance}
8744 @param instance: the instance which owns the device
8745 @type device: L{objects.Disk}
8746 @param device: the device to create
8747 @type force_create: boolean
8748 @param force_create: whether to force creation of this device; this
8749 will be change to True whenever we find a device which has
8750 CreateOnSecondary() attribute
8751 @param info: the extra 'metadata' we should attach to the device
8752 (this will be represented as a LVM tag)
8753 @type force_open: boolean
8754 @param force_open: this parameter will be passes to the
8755 L{backend.BlockdevCreate} function where it specifies
8756 whether we run on primary or not, and it affects both
8757 the child assembly and the device own Open() execution
8760 if device.CreateOnSecondary():
8764 for child in device.children:
8765 _CreateBlockDevInner(lu, node, instance, child, force_create,
8768 if not force_create:
8771 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8774 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8775 """Create a single block device on a given node.
8777 This will not recurse over children of the device, so they must be
8780 @param lu: the lu on whose behalf we execute
8781 @param node: the node on which to create the device
8782 @type instance: L{objects.Instance}
8783 @param instance: the instance which owns the device
8784 @type device: L{objects.Disk}
8785 @param device: the device to create
8786 @param info: the extra 'metadata' we should attach to the device
8787 (this will be represented as a LVM tag)
8788 @type force_open: boolean
8789 @param force_open: this parameter will be passes to the
8790 L{backend.BlockdevCreate} function where it specifies
8791 whether we run on primary or not, and it affects both
8792 the child assembly and the device own Open() execution
8795 lu.cfg.SetDiskID(device, node)
8796 result = lu.rpc.call_blockdev_create(node, device, device.size,
8797 instance.name, force_open, info)
8798 result.Raise("Can't create block device %s on"
8799 " node %s for instance %s" % (device, node, instance.name))
8800 if device.physical_id is None:
8801 device.physical_id = result.payload
8804 def _GenerateUniqueNames(lu, exts):
8805 """Generate a suitable LV name.
8807 This will generate a logical volume name for the given instance.
8812 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8813 results.append("%s%s" % (new_id, val))
8817 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8818 iv_name, p_minor, s_minor):
8819 """Generate a drbd8 device complete with its children.
8822 assert len(vgnames) == len(names) == 2
8823 port = lu.cfg.AllocatePort()
8824 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8826 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8827 logical_id=(vgnames[0], names[0]),
8829 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8830 size=constants.DRBD_META_SIZE,
8831 logical_id=(vgnames[1], names[1]),
8833 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8834 logical_id=(primary, secondary, port,
8837 children=[dev_data, dev_meta],
8838 iv_name=iv_name, params={})
8842 _DISK_TEMPLATE_NAME_PREFIX = {
8843 constants.DT_PLAIN: "",
8844 constants.DT_RBD: ".rbd",
8848 _DISK_TEMPLATE_DEVICE_TYPE = {
8849 constants.DT_PLAIN: constants.LD_LV,
8850 constants.DT_FILE: constants.LD_FILE,
8851 constants.DT_SHARED_FILE: constants.LD_FILE,
8852 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8853 constants.DT_RBD: constants.LD_RBD,
8857 def _GenerateDiskTemplate(
8858 lu, template_name, instance_name, primary_node, secondary_nodes,
8859 disk_info, file_storage_dir, file_driver, base_index,
8860 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8861 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8862 """Generate the entire disk layout for a given template type.
8865 #TODO: compute space requirements
8867 vgname = lu.cfg.GetVGName()
8868 disk_count = len(disk_info)
8871 if template_name == constants.DT_DISKLESS:
8873 elif template_name == constants.DT_DRBD8:
8874 if len(secondary_nodes) != 1:
8875 raise errors.ProgrammerError("Wrong template configuration")
8876 remote_node = secondary_nodes[0]
8877 minors = lu.cfg.AllocateDRBDMinor(
8878 [primary_node, remote_node] * len(disk_info), instance_name)
8880 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8882 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8885 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8886 for i in range(disk_count)]):
8887 names.append(lv_prefix + "_data")
8888 names.append(lv_prefix + "_meta")
8889 for idx, disk in enumerate(disk_info):
8890 disk_index = idx + base_index
8891 data_vg = disk.get(constants.IDISK_VG, vgname)
8892 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8893 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8894 disk[constants.IDISK_SIZE],
8896 names[idx * 2:idx * 2 + 2],
8897 "disk/%d" % disk_index,
8898 minors[idx * 2], minors[idx * 2 + 1])
8899 disk_dev.mode = disk[constants.IDISK_MODE]
8900 disks.append(disk_dev)
8903 raise errors.ProgrammerError("Wrong template configuration")
8905 if template_name == constants.DT_FILE:
8907 elif template_name == constants.DT_SHARED_FILE:
8908 _req_shr_file_storage()
8910 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8911 if name_prefix is None:
8914 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8915 (name_prefix, base_index + i)
8916 for i in range(disk_count)])
8918 if template_name == constants.DT_PLAIN:
8919 def logical_id_fn(idx, _, disk):
8920 vg = disk.get(constants.IDISK_VG, vgname)
8921 return (vg, names[idx])
8922 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8924 lambda _, disk_index, disk: (file_driver,
8925 "%s/disk%d" % (file_storage_dir,
8927 elif template_name == constants.DT_BLOCK:
8929 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8930 disk[constants.IDISK_ADOPT])
8931 elif template_name == constants.DT_RBD:
8932 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8934 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8936 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8938 for idx, disk in enumerate(disk_info):
8939 disk_index = idx + base_index
8940 size = disk[constants.IDISK_SIZE]
8941 feedback_fn("* disk %s, size %s" %
8942 (disk_index, utils.FormatUnit(size, "h")))
8943 disks.append(objects.Disk(dev_type=dev_type, size=size,
8944 logical_id=logical_id_fn(idx, disk_index, disk),
8945 iv_name="disk/%d" % disk_index,
8946 mode=disk[constants.IDISK_MODE],
8952 def _GetInstanceInfoText(instance):
8953 """Compute that text that should be added to the disk's metadata.
8956 return "originstname+%s" % instance.name
8959 def _CalcEta(time_taken, written, total_size):
8960 """Calculates the ETA based on size written and total size.
8962 @param time_taken: The time taken so far
8963 @param written: amount written so far
8964 @param total_size: The total size of data to be written
8965 @return: The remaining time in seconds
8968 avg_time = time_taken / float(written)
8969 return (total_size - written) * avg_time
8972 def _WipeDisks(lu, instance):
8973 """Wipes instance disks.
8975 @type lu: L{LogicalUnit}
8976 @param lu: the logical unit on whose behalf we execute
8977 @type instance: L{objects.Instance}
8978 @param instance: the instance whose disks we should create
8979 @return: the success of the wipe
8982 node = instance.primary_node
8984 for device in instance.disks:
8985 lu.cfg.SetDiskID(device, node)
8987 logging.info("Pause sync of instance %s disks", instance.name)
8988 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8989 (instance.disks, instance),
8991 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8993 for idx, success in enumerate(result.payload):
8995 logging.warn("pause-sync of instance %s for disks %d failed",
8999 for idx, device in enumerate(instance.disks):
9000 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9001 # MAX_WIPE_CHUNK at max
9002 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9003 constants.MIN_WIPE_CHUNK_PERCENT)
9004 # we _must_ make this an int, otherwise rounding errors will
9006 wipe_chunk_size = int(wipe_chunk_size)
9008 lu.LogInfo("* Wiping disk %d", idx)
9009 logging.info("Wiping disk %d for instance %s, node %s using"
9010 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9015 start_time = time.time()
9017 while offset < size:
9018 wipe_size = min(wipe_chunk_size, size - offset)
9019 logging.debug("Wiping disk %d, offset %s, chunk %s",
9020 idx, offset, wipe_size)
9021 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9023 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9024 (idx, offset, wipe_size))
9027 if now - last_output >= 60:
9028 eta = _CalcEta(now - start_time, offset, size)
9029 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9030 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9033 logging.info("Resume sync of instance %s disks", instance.name)
9035 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9036 (instance.disks, instance),
9040 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9041 " please have a look at the status and troubleshoot"
9042 " the issue: %s", node, result.fail_msg)
9044 for idx, success in enumerate(result.payload):
9046 lu.LogWarning("Resume sync of disk %d failed, please have a"
9047 " look at the status and troubleshoot the issue", idx)
9048 logging.warn("resume-sync of instance %s for disks %d failed",
9052 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9053 """Create all disks for an instance.
9055 This abstracts away some work from AddInstance.
9057 @type lu: L{LogicalUnit}
9058 @param lu: the logical unit on whose behalf we execute
9059 @type instance: L{objects.Instance}
9060 @param instance: the instance whose disks we should create
9062 @param to_skip: list of indices to skip
9063 @type target_node: string
9064 @param target_node: if passed, overrides the target node for creation
9066 @return: the success of the creation
9069 info = _GetInstanceInfoText(instance)
9070 if target_node is None:
9071 pnode = instance.primary_node
9072 all_nodes = instance.all_nodes
9077 if instance.disk_template in constants.DTS_FILEBASED:
9078 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9079 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9081 result.Raise("Failed to create directory '%s' on"
9082 " node %s" % (file_storage_dir, pnode))
9084 # Note: this needs to be kept in sync with adding of disks in
9085 # LUInstanceSetParams
9086 for idx, device in enumerate(instance.disks):
9087 if to_skip and idx in to_skip:
9089 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9091 for node in all_nodes:
9092 f_create = node == pnode
9093 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9096 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9097 """Remove all disks for an instance.
9099 This abstracts away some work from `AddInstance()` and
9100 `RemoveInstance()`. Note that in case some of the devices couldn't
9101 be removed, the removal will continue with the other ones (compare
9102 with `_CreateDisks()`).
9104 @type lu: L{LogicalUnit}
9105 @param lu: the logical unit on whose behalf we execute
9106 @type instance: L{objects.Instance}
9107 @param instance: the instance whose disks we should remove
9108 @type target_node: string
9109 @param target_node: used to override the node on which to remove the disks
9111 @return: the success of the removal
9114 logging.info("Removing block devices for instance %s", instance.name)
9117 ports_to_release = set()
9118 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9119 for (idx, device) in enumerate(anno_disks):
9121 edata = [(target_node, device)]
9123 edata = device.ComputeNodeTree(instance.primary_node)
9124 for node, disk in edata:
9125 lu.cfg.SetDiskID(disk, node)
9126 result = lu.rpc.call_blockdev_remove(node, disk)
9128 lu.LogWarning("Could not remove disk %s on node %s,"
9129 " continuing anyway: %s", idx, node, result.fail_msg)
9130 if not (result.offline and node != instance.primary_node):
9133 # if this is a DRBD disk, return its port to the pool
9134 if device.dev_type in constants.LDS_DRBD:
9135 ports_to_release.add(device.logical_id[2])
9137 if all_result or ignore_failures:
9138 for port in ports_to_release:
9139 lu.cfg.AddTcpUdpPort(port)
9141 if instance.disk_template == constants.DT_FILE:
9142 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9146 tgt = instance.primary_node
9147 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9149 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9150 file_storage_dir, instance.primary_node, result.fail_msg)
9156 def _ComputeDiskSizePerVG(disk_template, disks):
9157 """Compute disk size requirements in the volume group
9160 def _compute(disks, payload):
9161 """Universal algorithm.
9166 vgs[disk[constants.IDISK_VG]] = \
9167 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9171 # Required free disk space as a function of disk and swap space
9173 constants.DT_DISKLESS: {},
9174 constants.DT_PLAIN: _compute(disks, 0),
9175 # 128 MB are added for drbd metadata for each disk
9176 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9177 constants.DT_FILE: {},
9178 constants.DT_SHARED_FILE: {},
9181 if disk_template not in req_size_dict:
9182 raise errors.ProgrammerError("Disk template '%s' size requirement"
9183 " is unknown" % disk_template)
9185 return req_size_dict[disk_template]
9188 def _FilterVmNodes(lu, nodenames):
9189 """Filters out non-vm_capable nodes from a list.
9191 @type lu: L{LogicalUnit}
9192 @param lu: the logical unit for which we check
9193 @type nodenames: list
9194 @param nodenames: the list of nodes on which we should check
9196 @return: the list of vm-capable nodes
9199 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9200 return [name for name in nodenames if name not in vm_nodes]
9203 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9204 """Hypervisor parameter validation.
9206 This function abstract the hypervisor parameter validation to be
9207 used in both instance create and instance modify.
9209 @type lu: L{LogicalUnit}
9210 @param lu: the logical unit for which we check
9211 @type nodenames: list
9212 @param nodenames: the list of nodes on which we should check
9213 @type hvname: string
9214 @param hvname: the name of the hypervisor we should use
9215 @type hvparams: dict
9216 @param hvparams: the parameters which we need to check
9217 @raise errors.OpPrereqError: if the parameters are not valid
9220 nodenames = _FilterVmNodes(lu, nodenames)
9222 cluster = lu.cfg.GetClusterInfo()
9223 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9225 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9226 for node in nodenames:
9230 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9233 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9234 """OS parameters validation.
9236 @type lu: L{LogicalUnit}
9237 @param lu: the logical unit for which we check
9238 @type required: boolean
9239 @param required: whether the validation should fail if the OS is not
9241 @type nodenames: list
9242 @param nodenames: the list of nodes on which we should check
9243 @type osname: string
9244 @param osname: the name of the hypervisor we should use
9245 @type osparams: dict
9246 @param osparams: the parameters which we need to check
9247 @raise errors.OpPrereqError: if the parameters are not valid
9250 nodenames = _FilterVmNodes(lu, nodenames)
9251 result = lu.rpc.call_os_validate(nodenames, required, osname,
9252 [constants.OS_VALIDATE_PARAMETERS],
9254 for node, nres in result.items():
9255 # we don't check for offline cases since this should be run only
9256 # against the master node and/or an instance's nodes
9257 nres.Raise("OS Parameters validation failed on node %s" % node)
9258 if not nres.payload:
9259 lu.LogInfo("OS %s not found on node %s, validation skipped",
9263 class LUInstanceCreate(LogicalUnit):
9264 """Create an instance.
9267 HPATH = "instance-add"
9268 HTYPE = constants.HTYPE_INSTANCE
9271 def CheckArguments(self):
9275 # do not require name_check to ease forward/backward compatibility
9277 if self.op.no_install and self.op.start:
9278 self.LogInfo("No-installation mode selected, disabling startup")
9279 self.op.start = False
9280 # validate/normalize the instance name
9281 self.op.instance_name = \
9282 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9284 if self.op.ip_check and not self.op.name_check:
9285 # TODO: make the ip check more flexible and not depend on the name check
9286 raise errors.OpPrereqError("Cannot do IP address check without a name"
9287 " check", errors.ECODE_INVAL)
9289 # check nics' parameter names
9290 for nic in self.op.nics:
9291 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9293 # check disks. parameter names and consistent adopt/no-adopt strategy
9294 has_adopt = has_no_adopt = False
9295 for disk in self.op.disks:
9296 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9297 if constants.IDISK_ADOPT in disk:
9301 if has_adopt and has_no_adopt:
9302 raise errors.OpPrereqError("Either all disks are adopted or none is",
9305 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9306 raise errors.OpPrereqError("Disk adoption is not supported for the"
9307 " '%s' disk template" %
9308 self.op.disk_template,
9310 if self.op.iallocator is not None:
9311 raise errors.OpPrereqError("Disk adoption not allowed with an"
9312 " iallocator script", errors.ECODE_INVAL)
9313 if self.op.mode == constants.INSTANCE_IMPORT:
9314 raise errors.OpPrereqError("Disk adoption not allowed for"
9315 " instance import", errors.ECODE_INVAL)
9317 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9318 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9319 " but no 'adopt' parameter given" %
9320 self.op.disk_template,
9323 self.adopt_disks = has_adopt
9325 # instance name verification
9326 if self.op.name_check:
9327 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9328 self.op.instance_name = self.hostname1.name
9329 # used in CheckPrereq for ip ping check
9330 self.check_ip = self.hostname1.ip
9332 self.check_ip = None
9334 # file storage checks
9335 if (self.op.file_driver and
9336 not self.op.file_driver in constants.FILE_DRIVER):
9337 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9338 self.op.file_driver, errors.ECODE_INVAL)
9340 if self.op.disk_template == constants.DT_FILE:
9341 opcodes.RequireFileStorage()
9342 elif self.op.disk_template == constants.DT_SHARED_FILE:
9343 opcodes.RequireSharedFileStorage()
9345 ### Node/iallocator related checks
9346 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9348 if self.op.pnode is not None:
9349 if self.op.disk_template in constants.DTS_INT_MIRROR:
9350 if self.op.snode is None:
9351 raise errors.OpPrereqError("The networked disk templates need"
9352 " a mirror node", errors.ECODE_INVAL)
9354 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9356 self.op.snode = None
9358 self._cds = _GetClusterDomainSecret()
9360 if self.op.mode == constants.INSTANCE_IMPORT:
9361 # On import force_variant must be True, because if we forced it at
9362 # initial install, our only chance when importing it back is that it
9364 self.op.force_variant = True
9366 if self.op.no_install:
9367 self.LogInfo("No-installation mode has no effect during import")
9369 elif self.op.mode == constants.INSTANCE_CREATE:
9370 if self.op.os_type is None:
9371 raise errors.OpPrereqError("No guest OS specified",
9373 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9374 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9375 " installation" % self.op.os_type,
9377 if self.op.disk_template is None:
9378 raise errors.OpPrereqError("No disk template specified",
9381 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9382 # Check handshake to ensure both clusters have the same domain secret
9383 src_handshake = self.op.source_handshake
9384 if not src_handshake:
9385 raise errors.OpPrereqError("Missing source handshake",
9388 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9391 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9394 # Load and check source CA
9395 self.source_x509_ca_pem = self.op.source_x509_ca
9396 if not self.source_x509_ca_pem:
9397 raise errors.OpPrereqError("Missing source X509 CA",
9401 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9403 except OpenSSL.crypto.Error, err:
9404 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9405 (err, ), errors.ECODE_INVAL)
9407 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9408 if errcode is not None:
9409 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9412 self.source_x509_ca = cert
9414 src_instance_name = self.op.source_instance_name
9415 if not src_instance_name:
9416 raise errors.OpPrereqError("Missing source instance name",
9419 self.source_instance_name = \
9420 netutils.GetHostname(name=src_instance_name).name
9423 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9424 self.op.mode, errors.ECODE_INVAL)
9426 def ExpandNames(self):
9427 """ExpandNames for CreateInstance.
9429 Figure out the right locks for instance creation.
9432 self.needed_locks = {}
9434 instance_name = self.op.instance_name
9435 # this is just a preventive check, but someone might still add this
9436 # instance in the meantime, and creation will fail at lock-add time
9437 if instance_name in self.cfg.GetInstanceList():
9438 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9439 instance_name, errors.ECODE_EXISTS)
9441 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9443 if self.op.iallocator:
9444 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9445 # specifying a group on instance creation and then selecting nodes from
9447 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9448 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9450 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9451 nodelist = [self.op.pnode]
9452 if self.op.snode is not None:
9453 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9454 nodelist.append(self.op.snode)
9455 self.needed_locks[locking.LEVEL_NODE] = nodelist
9456 # Lock resources of instance's primary and secondary nodes (copy to
9457 # prevent accidential modification)
9458 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9460 # in case of import lock the source node too
9461 if self.op.mode == constants.INSTANCE_IMPORT:
9462 src_node = self.op.src_node
9463 src_path = self.op.src_path
9465 if src_path is None:
9466 self.op.src_path = src_path = self.op.instance_name
9468 if src_node is None:
9469 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9470 self.op.src_node = None
9471 if os.path.isabs(src_path):
9472 raise errors.OpPrereqError("Importing an instance from a path"
9473 " requires a source node option",
9476 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9477 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9478 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9479 if not os.path.isabs(src_path):
9480 self.op.src_path = src_path = \
9481 utils.PathJoin(constants.EXPORT_DIR, src_path)
9483 def _RunAllocator(self):
9484 """Run the allocator based on input opcode.
9487 nics = [n.ToDict() for n in self.nics]
9488 memory = self.be_full[constants.BE_MAXMEM]
9489 spindle_use = self.be_full[constants.BE_SPINDLE_USE]
9490 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
9491 disk_template=self.op.disk_template,
9494 vcpus=self.be_full[constants.BE_VCPUS],
9496 spindle_use=spindle_use,
9499 hypervisor=self.op.hypervisor)
9500 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9502 ial.Run(self.op.iallocator)
9505 raise errors.OpPrereqError("Can't compute nodes using"
9506 " iallocator '%s': %s" %
9507 (self.op.iallocator, ial.info),
9509 self.op.pnode = ial.result[0]
9510 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9511 self.op.instance_name, self.op.iallocator,
9512 utils.CommaJoin(ial.result))
9514 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9516 if req.RequiredNodes() == 2:
9517 self.op.snode = ial.result[1]
9519 def BuildHooksEnv(self):
9522 This runs on master, primary and secondary nodes of the instance.
9526 "ADD_MODE": self.op.mode,
9528 if self.op.mode == constants.INSTANCE_IMPORT:
9529 env["SRC_NODE"] = self.op.src_node
9530 env["SRC_PATH"] = self.op.src_path
9531 env["SRC_IMAGES"] = self.src_images
9533 env.update(_BuildInstanceHookEnv(
9534 name=self.op.instance_name,
9535 primary_node=self.op.pnode,
9536 secondary_nodes=self.secondaries,
9537 status=self.op.start,
9538 os_type=self.op.os_type,
9539 minmem=self.be_full[constants.BE_MINMEM],
9540 maxmem=self.be_full[constants.BE_MAXMEM],
9541 vcpus=self.be_full[constants.BE_VCPUS],
9542 nics=_NICListToTuple(self, self.nics),
9543 disk_template=self.op.disk_template,
9544 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9545 for d in self.disks],
9548 hypervisor_name=self.op.hypervisor,
9554 def BuildHooksNodes(self):
9555 """Build hooks nodes.
9558 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9561 def _ReadExportInfo(self):
9562 """Reads the export information from disk.
9564 It will override the opcode source node and path with the actual
9565 information, if these two were not specified before.
9567 @return: the export information
9570 assert self.op.mode == constants.INSTANCE_IMPORT
9572 src_node = self.op.src_node
9573 src_path = self.op.src_path
9575 if src_node is None:
9576 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9577 exp_list = self.rpc.call_export_list(locked_nodes)
9579 for node in exp_list:
9580 if exp_list[node].fail_msg:
9582 if src_path in exp_list[node].payload:
9584 self.op.src_node = src_node = node
9585 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9589 raise errors.OpPrereqError("No export found for relative path %s" %
9590 src_path, errors.ECODE_INVAL)
9592 _CheckNodeOnline(self, src_node)
9593 result = self.rpc.call_export_info(src_node, src_path)
9594 result.Raise("No export or invalid export found in dir %s" % src_path)
9596 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9597 if not export_info.has_section(constants.INISECT_EXP):
9598 raise errors.ProgrammerError("Corrupted export config",
9599 errors.ECODE_ENVIRON)
9601 ei_version = export_info.get(constants.INISECT_EXP, "version")
9602 if (int(ei_version) != constants.EXPORT_VERSION):
9603 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9604 (ei_version, constants.EXPORT_VERSION),
9605 errors.ECODE_ENVIRON)
9608 def _ReadExportParams(self, einfo):
9609 """Use export parameters as defaults.
9611 In case the opcode doesn't specify (as in override) some instance
9612 parameters, then try to use them from the export information, if
9616 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9618 if self.op.disk_template is None:
9619 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9620 self.op.disk_template = einfo.get(constants.INISECT_INS,
9622 if self.op.disk_template not in constants.DISK_TEMPLATES:
9623 raise errors.OpPrereqError("Disk template specified in configuration"
9624 " file is not one of the allowed values:"
9626 " ".join(constants.DISK_TEMPLATES),
9629 raise errors.OpPrereqError("No disk template specified and the export"
9630 " is missing the disk_template information",
9633 if not self.op.disks:
9635 # TODO: import the disk iv_name too
9636 for idx in range(constants.MAX_DISKS):
9637 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9638 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9639 disks.append({constants.IDISK_SIZE: disk_sz})
9640 self.op.disks = disks
9641 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9642 raise errors.OpPrereqError("No disk info specified and the export"
9643 " is missing the disk information",
9646 if not self.op.nics:
9648 for idx in range(constants.MAX_NICS):
9649 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9651 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9652 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9659 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9660 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9662 if (self.op.hypervisor is None and
9663 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9664 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9666 if einfo.has_section(constants.INISECT_HYP):
9667 # use the export parameters but do not override the ones
9668 # specified by the user
9669 for name, value in einfo.items(constants.INISECT_HYP):
9670 if name not in self.op.hvparams:
9671 self.op.hvparams[name] = value
9673 if einfo.has_section(constants.INISECT_BEP):
9674 # use the parameters, without overriding
9675 for name, value in einfo.items(constants.INISECT_BEP):
9676 if name not in self.op.beparams:
9677 self.op.beparams[name] = value
9678 # Compatibility for the old "memory" be param
9679 if name == constants.BE_MEMORY:
9680 if constants.BE_MAXMEM not in self.op.beparams:
9681 self.op.beparams[constants.BE_MAXMEM] = value
9682 if constants.BE_MINMEM not in self.op.beparams:
9683 self.op.beparams[constants.BE_MINMEM] = value
9685 # try to read the parameters old style, from the main section
9686 for name in constants.BES_PARAMETERS:
9687 if (name not in self.op.beparams and
9688 einfo.has_option(constants.INISECT_INS, name)):
9689 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9691 if einfo.has_section(constants.INISECT_OSP):
9692 # use the parameters, without overriding
9693 for name, value in einfo.items(constants.INISECT_OSP):
9694 if name not in self.op.osparams:
9695 self.op.osparams[name] = value
9697 def _RevertToDefaults(self, cluster):
9698 """Revert the instance parameters to the default values.
9702 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9703 for name in self.op.hvparams.keys():
9704 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9705 del self.op.hvparams[name]
9707 be_defs = cluster.SimpleFillBE({})
9708 for name in self.op.beparams.keys():
9709 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9710 del self.op.beparams[name]
9712 nic_defs = cluster.SimpleFillNIC({})
9713 for nic in self.op.nics:
9714 for name in constants.NICS_PARAMETERS:
9715 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9718 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9719 for name in self.op.osparams.keys():
9720 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9721 del self.op.osparams[name]
9723 def _CalculateFileStorageDir(self):
9724 """Calculate final instance file storage dir.
9727 # file storage dir calculation/check
9728 self.instance_file_storage_dir = None
9729 if self.op.disk_template in constants.DTS_FILEBASED:
9730 # build the full file storage dir path
9733 if self.op.disk_template == constants.DT_SHARED_FILE:
9734 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9736 get_fsd_fn = self.cfg.GetFileStorageDir
9738 cfg_storagedir = get_fsd_fn()
9739 if not cfg_storagedir:
9740 raise errors.OpPrereqError("Cluster file storage dir not defined",
9742 joinargs.append(cfg_storagedir)
9744 if self.op.file_storage_dir is not None:
9745 joinargs.append(self.op.file_storage_dir)
9747 joinargs.append(self.op.instance_name)
9749 # pylint: disable=W0142
9750 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9752 def CheckPrereq(self): # pylint: disable=R0914
9753 """Check prerequisites.
9756 self._CalculateFileStorageDir()
9758 if self.op.mode == constants.INSTANCE_IMPORT:
9759 export_info = self._ReadExportInfo()
9760 self._ReadExportParams(export_info)
9761 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9763 self._old_instance_name = None
9765 if (not self.cfg.GetVGName() and
9766 self.op.disk_template not in constants.DTS_NOT_LVM):
9767 raise errors.OpPrereqError("Cluster does not support lvm-based"
9768 " instances", errors.ECODE_STATE)
9770 if (self.op.hypervisor is None or
9771 self.op.hypervisor == constants.VALUE_AUTO):
9772 self.op.hypervisor = self.cfg.GetHypervisorType()
9774 cluster = self.cfg.GetClusterInfo()
9775 enabled_hvs = cluster.enabled_hypervisors
9776 if self.op.hypervisor not in enabled_hvs:
9777 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9779 (self.op.hypervisor, ",".join(enabled_hvs)),
9782 # Check tag validity
9783 for tag in self.op.tags:
9784 objects.TaggableObject.ValidateTag(tag)
9786 # check hypervisor parameter syntax (locally)
9787 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9788 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9790 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9791 hv_type.CheckParameterSyntax(filled_hvp)
9792 self.hv_full = filled_hvp
9793 # check that we don't specify global parameters on an instance
9794 _CheckGlobalHvParams(self.op.hvparams)
9796 # fill and remember the beparams dict
9797 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9798 for param, value in self.op.beparams.iteritems():
9799 if value == constants.VALUE_AUTO:
9800 self.op.beparams[param] = default_beparams[param]
9801 objects.UpgradeBeParams(self.op.beparams)
9802 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9803 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9805 # build os parameters
9806 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9808 # now that hvp/bep are in final format, let's reset to defaults,
9810 if self.op.identify_defaults:
9811 self._RevertToDefaults(cluster)
9815 for idx, nic in enumerate(self.op.nics):
9816 nic_mode_req = nic.get(constants.INIC_MODE, None)
9817 nic_mode = nic_mode_req
9818 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9819 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9821 # in routed mode, for the first nic, the default ip is 'auto'
9822 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9823 default_ip_mode = constants.VALUE_AUTO
9825 default_ip_mode = constants.VALUE_NONE
9827 # ip validity checks
9828 ip = nic.get(constants.INIC_IP, default_ip_mode)
9829 if ip is None or ip.lower() == constants.VALUE_NONE:
9831 elif ip.lower() == constants.VALUE_AUTO:
9832 if not self.op.name_check:
9833 raise errors.OpPrereqError("IP address set to auto but name checks"
9834 " have been skipped",
9836 nic_ip = self.hostname1.ip
9838 if not netutils.IPAddress.IsValid(ip):
9839 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9843 # TODO: check the ip address for uniqueness
9844 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9845 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9848 # MAC address verification
9849 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9850 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9851 mac = utils.NormalizeAndValidateMac(mac)
9854 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9855 except errors.ReservationError:
9856 raise errors.OpPrereqError("MAC address %s already in use"
9857 " in cluster" % mac,
9858 errors.ECODE_NOTUNIQUE)
9860 # Build nic parameters
9861 link = nic.get(constants.INIC_LINK, None)
9862 if link == constants.VALUE_AUTO:
9863 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9866 nicparams[constants.NIC_MODE] = nic_mode
9868 nicparams[constants.NIC_LINK] = link
9870 check_params = cluster.SimpleFillNIC(nicparams)
9871 objects.NIC.CheckParameterSyntax(check_params)
9872 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9874 # disk checks/pre-build
9875 default_vg = self.cfg.GetVGName()
9877 for disk in self.op.disks:
9878 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9879 if mode not in constants.DISK_ACCESS_SET:
9880 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9881 mode, errors.ECODE_INVAL)
9882 size = disk.get(constants.IDISK_SIZE, None)
9884 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9887 except (TypeError, ValueError):
9888 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9891 data_vg = disk.get(constants.IDISK_VG, default_vg)
9893 constants.IDISK_SIZE: size,
9894 constants.IDISK_MODE: mode,
9895 constants.IDISK_VG: data_vg,
9897 if constants.IDISK_METAVG in disk:
9898 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9899 if constants.IDISK_ADOPT in disk:
9900 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9901 self.disks.append(new_disk)
9903 if self.op.mode == constants.INSTANCE_IMPORT:
9905 for idx in range(len(self.disks)):
9906 option = "disk%d_dump" % idx
9907 if export_info.has_option(constants.INISECT_INS, option):
9908 # FIXME: are the old os-es, disk sizes, etc. useful?
9909 export_name = export_info.get(constants.INISECT_INS, option)
9910 image = utils.PathJoin(self.op.src_path, export_name)
9911 disk_images.append(image)
9913 disk_images.append(False)
9915 self.src_images = disk_images
9917 if self.op.instance_name == self._old_instance_name:
9918 for idx, nic in enumerate(self.nics):
9919 if nic.mac == constants.VALUE_AUTO:
9920 nic_mac_ini = "nic%d_mac" % idx
9921 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9923 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9925 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9926 if self.op.ip_check:
9927 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9928 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9929 (self.check_ip, self.op.instance_name),
9930 errors.ECODE_NOTUNIQUE)
9932 #### mac address generation
9933 # By generating here the mac address both the allocator and the hooks get
9934 # the real final mac address rather than the 'auto' or 'generate' value.
9935 # There is a race condition between the generation and the instance object
9936 # creation, which means that we know the mac is valid now, but we're not
9937 # sure it will be when we actually add the instance. If things go bad
9938 # adding the instance will abort because of a duplicate mac, and the
9939 # creation job will fail.
9940 for nic in self.nics:
9941 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9942 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9946 if self.op.iallocator is not None:
9947 self._RunAllocator()
9949 # Release all unneeded node locks
9950 _ReleaseLocks(self, locking.LEVEL_NODE,
9951 keep=filter(None, [self.op.pnode, self.op.snode,
9953 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9954 keep=filter(None, [self.op.pnode, self.op.snode,
9957 #### node related checks
9959 # check primary node
9960 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9961 assert self.pnode is not None, \
9962 "Cannot retrieve locked node %s" % self.op.pnode
9964 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9965 pnode.name, errors.ECODE_STATE)
9967 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9968 pnode.name, errors.ECODE_STATE)
9969 if not pnode.vm_capable:
9970 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9971 " '%s'" % pnode.name, errors.ECODE_STATE)
9973 self.secondaries = []
9975 # mirror node verification
9976 if self.op.disk_template in constants.DTS_INT_MIRROR:
9977 if self.op.snode == pnode.name:
9978 raise errors.OpPrereqError("The secondary node cannot be the"
9979 " primary node", errors.ECODE_INVAL)
9980 _CheckNodeOnline(self, self.op.snode)
9981 _CheckNodeNotDrained(self, self.op.snode)
9982 _CheckNodeVmCapable(self, self.op.snode)
9983 self.secondaries.append(self.op.snode)
9985 snode = self.cfg.GetNodeInfo(self.op.snode)
9986 if pnode.group != snode.group:
9987 self.LogWarning("The primary and secondary nodes are in two"
9988 " different node groups; the disk parameters"
9989 " from the first disk's node group will be"
9992 nodenames = [pnode.name] + self.secondaries
9994 # Verify instance specs
9995 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9997 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9998 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9999 constants.ISPEC_DISK_COUNT: len(self.disks),
10000 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10001 constants.ISPEC_NIC_COUNT: len(self.nics),
10002 constants.ISPEC_SPINDLE_USE: spindle_use,
10005 group_info = self.cfg.GetNodeGroup(pnode.group)
10006 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10007 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10008 if not self.op.ignore_ipolicy and res:
10009 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10010 " policy: %s") % (pnode.group,
10011 utils.CommaJoin(res)),
10012 errors.ECODE_INVAL)
10014 if not self.adopt_disks:
10015 if self.op.disk_template == constants.DT_RBD:
10016 # _CheckRADOSFreeSpace() is just a placeholder.
10017 # Any function that checks prerequisites can be placed here.
10018 # Check if there is enough space on the RADOS cluster.
10019 _CheckRADOSFreeSpace()
10021 # Check lv size requirements, if not adopting
10022 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10023 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10025 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10026 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10027 disk[constants.IDISK_ADOPT])
10028 for disk in self.disks])
10029 if len(all_lvs) != len(self.disks):
10030 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10031 errors.ECODE_INVAL)
10032 for lv_name in all_lvs:
10034 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10035 # to ReserveLV uses the same syntax
10036 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10037 except errors.ReservationError:
10038 raise errors.OpPrereqError("LV named %s used by another instance" %
10039 lv_name, errors.ECODE_NOTUNIQUE)
10041 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10042 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10044 node_lvs = self.rpc.call_lv_list([pnode.name],
10045 vg_names.payload.keys())[pnode.name]
10046 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10047 node_lvs = node_lvs.payload
10049 delta = all_lvs.difference(node_lvs.keys())
10051 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10052 utils.CommaJoin(delta),
10053 errors.ECODE_INVAL)
10054 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10056 raise errors.OpPrereqError("Online logical volumes found, cannot"
10057 " adopt: %s" % utils.CommaJoin(online_lvs),
10058 errors.ECODE_STATE)
10059 # update the size of disk based on what is found
10060 for dsk in self.disks:
10061 dsk[constants.IDISK_SIZE] = \
10062 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10063 dsk[constants.IDISK_ADOPT])][0]))
10065 elif self.op.disk_template == constants.DT_BLOCK:
10066 # Normalize and de-duplicate device paths
10067 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10068 for disk in self.disks])
10069 if len(all_disks) != len(self.disks):
10070 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10071 errors.ECODE_INVAL)
10072 baddisks = [d for d in all_disks
10073 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10075 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10076 " cannot be adopted" %
10077 (", ".join(baddisks),
10078 constants.ADOPTABLE_BLOCKDEV_ROOT),
10079 errors.ECODE_INVAL)
10081 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10082 list(all_disks))[pnode.name]
10083 node_disks.Raise("Cannot get block device information from node %s" %
10085 node_disks = node_disks.payload
10086 delta = all_disks.difference(node_disks.keys())
10088 raise errors.OpPrereqError("Missing block device(s): %s" %
10089 utils.CommaJoin(delta),
10090 errors.ECODE_INVAL)
10091 for dsk in self.disks:
10092 dsk[constants.IDISK_SIZE] = \
10093 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10095 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10097 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10098 # check OS parameters (remotely)
10099 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10101 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10103 # memory check on primary node
10104 #TODO(dynmem): use MINMEM for checking
10106 _CheckNodeFreeMemory(self, self.pnode.name,
10107 "creating instance %s" % self.op.instance_name,
10108 self.be_full[constants.BE_MAXMEM],
10109 self.op.hypervisor)
10111 self.dry_run_result = list(nodenames)
10113 def Exec(self, feedback_fn):
10114 """Create and add the instance to the cluster.
10117 instance = self.op.instance_name
10118 pnode_name = self.pnode.name
10120 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10121 self.owned_locks(locking.LEVEL_NODE)), \
10122 "Node locks differ from node resource locks"
10124 ht_kind = self.op.hypervisor
10125 if ht_kind in constants.HTS_REQ_PORT:
10126 network_port = self.cfg.AllocatePort()
10128 network_port = None
10130 # This is ugly but we got a chicken-egg problem here
10131 # We can only take the group disk parameters, as the instance
10132 # has no disks yet (we are generating them right here).
10133 node = self.cfg.GetNodeInfo(pnode_name)
10134 nodegroup = self.cfg.GetNodeGroup(node.group)
10135 disks = _GenerateDiskTemplate(self,
10136 self.op.disk_template,
10137 instance, pnode_name,
10140 self.instance_file_storage_dir,
10141 self.op.file_driver,
10144 self.cfg.GetGroupDiskParams(nodegroup))
10146 iobj = objects.Instance(name=instance, os=self.op.os_type,
10147 primary_node=pnode_name,
10148 nics=self.nics, disks=disks,
10149 disk_template=self.op.disk_template,
10150 admin_state=constants.ADMINST_DOWN,
10151 network_port=network_port,
10152 beparams=self.op.beparams,
10153 hvparams=self.op.hvparams,
10154 hypervisor=self.op.hypervisor,
10155 osparams=self.op.osparams,
10159 for tag in self.op.tags:
10162 if self.adopt_disks:
10163 if self.op.disk_template == constants.DT_PLAIN:
10164 # rename LVs to the newly-generated names; we need to construct
10165 # 'fake' LV disks with the old data, plus the new unique_id
10166 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10168 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10169 rename_to.append(t_dsk.logical_id)
10170 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10171 self.cfg.SetDiskID(t_dsk, pnode_name)
10172 result = self.rpc.call_blockdev_rename(pnode_name,
10173 zip(tmp_disks, rename_to))
10174 result.Raise("Failed to rename adoped LVs")
10176 feedback_fn("* creating instance disks...")
10178 _CreateDisks(self, iobj)
10179 except errors.OpExecError:
10180 self.LogWarning("Device creation failed, reverting...")
10182 _RemoveDisks(self, iobj)
10184 self.cfg.ReleaseDRBDMinors(instance)
10187 feedback_fn("adding instance %s to cluster config" % instance)
10189 self.cfg.AddInstance(iobj, self.proc.GetECId())
10191 # Declare that we don't want to remove the instance lock anymore, as we've
10192 # added the instance to the config
10193 del self.remove_locks[locking.LEVEL_INSTANCE]
10195 if self.op.mode == constants.INSTANCE_IMPORT:
10196 # Release unused nodes
10197 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10199 # Release all nodes
10200 _ReleaseLocks(self, locking.LEVEL_NODE)
10203 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10204 feedback_fn("* wiping instance disks...")
10206 _WipeDisks(self, iobj)
10207 except errors.OpExecError, err:
10208 logging.exception("Wiping disks failed")
10209 self.LogWarning("Wiping instance disks failed (%s)", err)
10213 # Something is already wrong with the disks, don't do anything else
10215 elif self.op.wait_for_sync:
10216 disk_abort = not _WaitForSync(self, iobj)
10217 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10218 # make sure the disks are not degraded (still sync-ing is ok)
10219 feedback_fn("* checking mirrors status")
10220 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10225 _RemoveDisks(self, iobj)
10226 self.cfg.RemoveInstance(iobj.name)
10227 # Make sure the instance lock gets removed
10228 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10229 raise errors.OpExecError("There are some degraded disks for"
10232 # Release all node resource locks
10233 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10235 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10236 # we need to set the disks ID to the primary node, since the
10237 # preceding code might or might have not done it, depending on
10238 # disk template and other options
10239 for disk in iobj.disks:
10240 self.cfg.SetDiskID(disk, pnode_name)
10241 if self.op.mode == constants.INSTANCE_CREATE:
10242 if not self.op.no_install:
10243 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10244 not self.op.wait_for_sync)
10246 feedback_fn("* pausing disk sync to install instance OS")
10247 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10250 for idx, success in enumerate(result.payload):
10252 logging.warn("pause-sync of instance %s for disk %d failed",
10255 feedback_fn("* running the instance OS create scripts...")
10256 # FIXME: pass debug option from opcode to backend
10258 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10259 self.op.debug_level)
10261 feedback_fn("* resuming disk sync")
10262 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10265 for idx, success in enumerate(result.payload):
10267 logging.warn("resume-sync of instance %s for disk %d failed",
10270 os_add_result.Raise("Could not add os for instance %s"
10271 " on node %s" % (instance, pnode_name))
10274 if self.op.mode == constants.INSTANCE_IMPORT:
10275 feedback_fn("* running the instance OS import scripts...")
10279 for idx, image in enumerate(self.src_images):
10283 # FIXME: pass debug option from opcode to backend
10284 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10285 constants.IEIO_FILE, (image, ),
10286 constants.IEIO_SCRIPT,
10287 (iobj.disks[idx], idx),
10289 transfers.append(dt)
10292 masterd.instance.TransferInstanceData(self, feedback_fn,
10293 self.op.src_node, pnode_name,
10294 self.pnode.secondary_ip,
10296 if not compat.all(import_result):
10297 self.LogWarning("Some disks for instance %s on node %s were not"
10298 " imported successfully" % (instance, pnode_name))
10300 rename_from = self._old_instance_name
10302 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10303 feedback_fn("* preparing remote import...")
10304 # The source cluster will stop the instance before attempting to make
10305 # a connection. In some cases stopping an instance can take a long
10306 # time, hence the shutdown timeout is added to the connection
10308 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10309 self.op.source_shutdown_timeout)
10310 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10312 assert iobj.primary_node == self.pnode.name
10314 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10315 self.source_x509_ca,
10316 self._cds, timeouts)
10317 if not compat.all(disk_results):
10318 # TODO: Should the instance still be started, even if some disks
10319 # failed to import (valid for local imports, too)?
10320 self.LogWarning("Some disks for instance %s on node %s were not"
10321 " imported successfully" % (instance, pnode_name))
10323 rename_from = self.source_instance_name
10326 # also checked in the prereq part
10327 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10330 # Run rename script on newly imported instance
10331 assert iobj.name == instance
10332 feedback_fn("Running rename script for %s" % instance)
10333 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10335 self.op.debug_level)
10336 if result.fail_msg:
10337 self.LogWarning("Failed to run rename script for %s on node"
10338 " %s: %s" % (instance, pnode_name, result.fail_msg))
10340 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10343 iobj.admin_state = constants.ADMINST_UP
10344 self.cfg.Update(iobj, feedback_fn)
10345 logging.info("Starting instance %s on node %s", instance, pnode_name)
10346 feedback_fn("* starting instance...")
10347 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10349 result.Raise("Could not start instance")
10351 return list(iobj.all_nodes)
10354 def _CheckRADOSFreeSpace():
10355 """Compute disk size requirements inside the RADOS cluster.
10358 # For the RADOS cluster we assume there is always enough space.
10362 class LUInstanceConsole(NoHooksLU):
10363 """Connect to an instance's console.
10365 This is somewhat special in that it returns the command line that
10366 you need to run on the master node in order to connect to the
10372 def ExpandNames(self):
10373 self.share_locks = _ShareAll()
10374 self._ExpandAndLockInstance()
10376 def CheckPrereq(self):
10377 """Check prerequisites.
10379 This checks that the instance is in the cluster.
10382 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10383 assert self.instance is not None, \
10384 "Cannot retrieve locked instance %s" % self.op.instance_name
10385 _CheckNodeOnline(self, self.instance.primary_node)
10387 def Exec(self, feedback_fn):
10388 """Connect to the console of an instance
10391 instance = self.instance
10392 node = instance.primary_node
10394 node_insts = self.rpc.call_instance_list([node],
10395 [instance.hypervisor])[node]
10396 node_insts.Raise("Can't get node information from %s" % node)
10398 if instance.name not in node_insts.payload:
10399 if instance.admin_state == constants.ADMINST_UP:
10400 state = constants.INSTST_ERRORDOWN
10401 elif instance.admin_state == constants.ADMINST_DOWN:
10402 state = constants.INSTST_ADMINDOWN
10404 state = constants.INSTST_ADMINOFFLINE
10405 raise errors.OpExecError("Instance %s is not running (state %s)" %
10406 (instance.name, state))
10408 logging.debug("Connecting to console of %s on %s", instance.name, node)
10410 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10413 def _GetInstanceConsole(cluster, instance):
10414 """Returns console information for an instance.
10416 @type cluster: L{objects.Cluster}
10417 @type instance: L{objects.Instance}
10421 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10422 # beparams and hvparams are passed separately, to avoid editing the
10423 # instance and then saving the defaults in the instance itself.
10424 hvparams = cluster.FillHV(instance)
10425 beparams = cluster.FillBE(instance)
10426 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10428 assert console.instance == instance.name
10429 assert console.Validate()
10431 return console.ToDict()
10434 class LUInstanceReplaceDisks(LogicalUnit):
10435 """Replace the disks of an instance.
10438 HPATH = "mirrors-replace"
10439 HTYPE = constants.HTYPE_INSTANCE
10442 def CheckArguments(self):
10443 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10444 self.op.iallocator)
10446 def ExpandNames(self):
10447 self._ExpandAndLockInstance()
10449 assert locking.LEVEL_NODE not in self.needed_locks
10450 assert locking.LEVEL_NODE_RES not in self.needed_locks
10451 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10453 assert self.op.iallocator is None or self.op.remote_node is None, \
10454 "Conflicting options"
10456 if self.op.remote_node is not None:
10457 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10459 # Warning: do not remove the locking of the new secondary here
10460 # unless DRBD8.AddChildren is changed to work in parallel;
10461 # currently it doesn't since parallel invocations of
10462 # FindUnusedMinor will conflict
10463 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10464 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10466 self.needed_locks[locking.LEVEL_NODE] = []
10467 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10469 if self.op.iallocator is not None:
10470 # iallocator will select a new node in the same group
10471 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10473 self.needed_locks[locking.LEVEL_NODE_RES] = []
10475 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10476 self.op.iallocator, self.op.remote_node,
10477 self.op.disks, False, self.op.early_release,
10478 self.op.ignore_ipolicy)
10480 self.tasklets = [self.replacer]
10482 def DeclareLocks(self, level):
10483 if level == locking.LEVEL_NODEGROUP:
10484 assert self.op.remote_node is None
10485 assert self.op.iallocator is not None
10486 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10488 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10489 # Lock all groups used by instance optimistically; this requires going
10490 # via the node before it's locked, requiring verification later on
10491 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10492 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10494 elif level == locking.LEVEL_NODE:
10495 if self.op.iallocator is not None:
10496 assert self.op.remote_node is None
10497 assert not self.needed_locks[locking.LEVEL_NODE]
10499 # Lock member nodes of all locked groups
10500 self.needed_locks[locking.LEVEL_NODE] = \
10502 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10503 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10505 self._LockInstancesNodes()
10506 elif level == locking.LEVEL_NODE_RES:
10508 self.needed_locks[locking.LEVEL_NODE_RES] = \
10509 self.needed_locks[locking.LEVEL_NODE]
10511 def BuildHooksEnv(self):
10512 """Build hooks env.
10514 This runs on the master, the primary and all the secondaries.
10517 instance = self.replacer.instance
10519 "MODE": self.op.mode,
10520 "NEW_SECONDARY": self.op.remote_node,
10521 "OLD_SECONDARY": instance.secondary_nodes[0],
10523 env.update(_BuildInstanceHookEnvByObject(self, instance))
10526 def BuildHooksNodes(self):
10527 """Build hooks nodes.
10530 instance = self.replacer.instance
10532 self.cfg.GetMasterNode(),
10533 instance.primary_node,
10535 if self.op.remote_node is not None:
10536 nl.append(self.op.remote_node)
10539 def CheckPrereq(self):
10540 """Check prerequisites.
10543 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10544 self.op.iallocator is None)
10546 # Verify if node group locks are still correct
10547 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10549 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10551 return LogicalUnit.CheckPrereq(self)
10554 class TLReplaceDisks(Tasklet):
10555 """Replaces disks for an instance.
10557 Note: Locking is not within the scope of this class.
10560 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10561 disks, delay_iallocator, early_release, ignore_ipolicy):
10562 """Initializes this class.
10565 Tasklet.__init__(self, lu)
10568 self.instance_name = instance_name
10570 self.iallocator_name = iallocator_name
10571 self.remote_node = remote_node
10573 self.delay_iallocator = delay_iallocator
10574 self.early_release = early_release
10575 self.ignore_ipolicy = ignore_ipolicy
10578 self.instance = None
10579 self.new_node = None
10580 self.target_node = None
10581 self.other_node = None
10582 self.remote_node_info = None
10583 self.node_secondary_ip = None
10586 def CheckArguments(mode, remote_node, ialloc):
10587 """Helper function for users of this class.
10590 # check for valid parameter combination
10591 if mode == constants.REPLACE_DISK_CHG:
10592 if remote_node is None and ialloc is None:
10593 raise errors.OpPrereqError("When changing the secondary either an"
10594 " iallocator script must be used or the"
10595 " new node given", errors.ECODE_INVAL)
10597 if remote_node is not None and ialloc is not None:
10598 raise errors.OpPrereqError("Give either the iallocator or the new"
10599 " secondary, not both", errors.ECODE_INVAL)
10601 elif remote_node is not None or ialloc is not None:
10602 # Not replacing the secondary
10603 raise errors.OpPrereqError("The iallocator and new node options can"
10604 " only be used when changing the"
10605 " secondary node", errors.ECODE_INVAL)
10608 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10609 """Compute a new secondary node using an IAllocator.
10612 req = iallocator.IAReqRelocate(name=instance_name,
10613 relocate_from=list(relocate_from))
10614 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10616 ial.Run(iallocator_name)
10618 if not ial.success:
10619 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10620 " %s" % (iallocator_name, ial.info),
10621 errors.ECODE_NORES)
10623 remote_node_name = ial.result[0]
10625 lu.LogInfo("Selected new secondary for instance '%s': %s",
10626 instance_name, remote_node_name)
10628 return remote_node_name
10630 def _FindFaultyDisks(self, node_name):
10631 """Wrapper for L{_FindFaultyInstanceDisks}.
10634 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10637 def _CheckDisksActivated(self, instance):
10638 """Checks if the instance disks are activated.
10640 @param instance: The instance to check disks
10641 @return: True if they are activated, False otherwise
10644 nodes = instance.all_nodes
10646 for idx, dev in enumerate(instance.disks):
10648 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10649 self.cfg.SetDiskID(dev, node)
10651 result = _BlockdevFind(self, node, dev, instance)
10655 elif result.fail_msg or not result.payload:
10660 def CheckPrereq(self):
10661 """Check prerequisites.
10663 This checks that the instance is in the cluster.
10666 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10667 assert instance is not None, \
10668 "Cannot retrieve locked instance %s" % self.instance_name
10670 if instance.disk_template != constants.DT_DRBD8:
10671 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10672 " instances", errors.ECODE_INVAL)
10674 if len(instance.secondary_nodes) != 1:
10675 raise errors.OpPrereqError("The instance has a strange layout,"
10676 " expected one secondary but found %d" %
10677 len(instance.secondary_nodes),
10678 errors.ECODE_FAULT)
10680 if not self.delay_iallocator:
10681 self._CheckPrereq2()
10683 def _CheckPrereq2(self):
10684 """Check prerequisites, second part.
10686 This function should always be part of CheckPrereq. It was separated and is
10687 now called from Exec because during node evacuation iallocator was only
10688 called with an unmodified cluster model, not taking planned changes into
10692 instance = self.instance
10693 secondary_node = instance.secondary_nodes[0]
10695 if self.iallocator_name is None:
10696 remote_node = self.remote_node
10698 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10699 instance.name, instance.secondary_nodes)
10701 if remote_node is None:
10702 self.remote_node_info = None
10704 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10705 "Remote node '%s' is not locked" % remote_node
10707 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10708 assert self.remote_node_info is not None, \
10709 "Cannot retrieve locked node %s" % remote_node
10711 if remote_node == self.instance.primary_node:
10712 raise errors.OpPrereqError("The specified node is the primary node of"
10713 " the instance", errors.ECODE_INVAL)
10715 if remote_node == secondary_node:
10716 raise errors.OpPrereqError("The specified node is already the"
10717 " secondary node of the instance",
10718 errors.ECODE_INVAL)
10720 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10721 constants.REPLACE_DISK_CHG):
10722 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10723 errors.ECODE_INVAL)
10725 if self.mode == constants.REPLACE_DISK_AUTO:
10726 if not self._CheckDisksActivated(instance):
10727 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10728 " first" % self.instance_name,
10729 errors.ECODE_STATE)
10730 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10731 faulty_secondary = self._FindFaultyDisks(secondary_node)
10733 if faulty_primary and faulty_secondary:
10734 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10735 " one node and can not be repaired"
10736 " automatically" % self.instance_name,
10737 errors.ECODE_STATE)
10740 self.disks = faulty_primary
10741 self.target_node = instance.primary_node
10742 self.other_node = secondary_node
10743 check_nodes = [self.target_node, self.other_node]
10744 elif faulty_secondary:
10745 self.disks = faulty_secondary
10746 self.target_node = secondary_node
10747 self.other_node = instance.primary_node
10748 check_nodes = [self.target_node, self.other_node]
10754 # Non-automatic modes
10755 if self.mode == constants.REPLACE_DISK_PRI:
10756 self.target_node = instance.primary_node
10757 self.other_node = secondary_node
10758 check_nodes = [self.target_node, self.other_node]
10760 elif self.mode == constants.REPLACE_DISK_SEC:
10761 self.target_node = secondary_node
10762 self.other_node = instance.primary_node
10763 check_nodes = [self.target_node, self.other_node]
10765 elif self.mode == constants.REPLACE_DISK_CHG:
10766 self.new_node = remote_node
10767 self.other_node = instance.primary_node
10768 self.target_node = secondary_node
10769 check_nodes = [self.new_node, self.other_node]
10771 _CheckNodeNotDrained(self.lu, remote_node)
10772 _CheckNodeVmCapable(self.lu, remote_node)
10774 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10775 assert old_node_info is not None
10776 if old_node_info.offline and not self.early_release:
10777 # doesn't make sense to delay the release
10778 self.early_release = True
10779 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10780 " early-release mode", secondary_node)
10783 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10786 # If not specified all disks should be replaced
10788 self.disks = range(len(self.instance.disks))
10790 # TODO: This is ugly, but right now we can't distinguish between internal
10791 # submitted opcode and external one. We should fix that.
10792 if self.remote_node_info:
10793 # We change the node, lets verify it still meets instance policy
10794 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10795 cluster = self.cfg.GetClusterInfo()
10796 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10798 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10799 ignore=self.ignore_ipolicy)
10801 for node in check_nodes:
10802 _CheckNodeOnline(self.lu, node)
10804 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10807 if node_name is not None)
10809 # Release unneeded node and node resource locks
10810 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10811 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10813 # Release any owned node group
10814 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10815 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10817 # Check whether disks are valid
10818 for disk_idx in self.disks:
10819 instance.FindDisk(disk_idx)
10821 # Get secondary node IP addresses
10822 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10823 in self.cfg.GetMultiNodeInfo(touched_nodes))
10825 def Exec(self, feedback_fn):
10826 """Execute disk replacement.
10828 This dispatches the disk replacement to the appropriate handler.
10831 if self.delay_iallocator:
10832 self._CheckPrereq2()
10835 # Verify owned locks before starting operation
10836 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10837 assert set(owned_nodes) == set(self.node_secondary_ip), \
10838 ("Incorrect node locks, owning %s, expected %s" %
10839 (owned_nodes, self.node_secondary_ip.keys()))
10840 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10841 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10843 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10844 assert list(owned_instances) == [self.instance_name], \
10845 "Instance '%s' not locked" % self.instance_name
10847 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10848 "Should not own any node group lock at this point"
10851 feedback_fn("No disks need replacement")
10854 feedback_fn("Replacing disk(s) %s for %s" %
10855 (utils.CommaJoin(self.disks), self.instance.name))
10857 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10859 # Activate the instance disks if we're replacing them on a down instance
10861 _StartInstanceDisks(self.lu, self.instance, True)
10864 # Should we replace the secondary node?
10865 if self.new_node is not None:
10866 fn = self._ExecDrbd8Secondary
10868 fn = self._ExecDrbd8DiskOnly
10870 result = fn(feedback_fn)
10872 # Deactivate the instance disks if we're replacing them on a
10875 _SafeShutdownInstanceDisks(self.lu, self.instance)
10877 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10880 # Verify owned locks
10881 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10882 nodes = frozenset(self.node_secondary_ip)
10883 assert ((self.early_release and not owned_nodes) or
10884 (not self.early_release and not (set(owned_nodes) - nodes))), \
10885 ("Not owning the correct locks, early_release=%s, owned=%r,"
10886 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10890 def _CheckVolumeGroup(self, nodes):
10891 self.lu.LogInfo("Checking volume groups")
10893 vgname = self.cfg.GetVGName()
10895 # Make sure volume group exists on all involved nodes
10896 results = self.rpc.call_vg_list(nodes)
10898 raise errors.OpExecError("Can't list volume groups on the nodes")
10901 res = results[node]
10902 res.Raise("Error checking node %s" % node)
10903 if vgname not in res.payload:
10904 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10907 def _CheckDisksExistence(self, nodes):
10908 # Check disk existence
10909 for idx, dev in enumerate(self.instance.disks):
10910 if idx not in self.disks:
10914 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10915 self.cfg.SetDiskID(dev, node)
10917 result = _BlockdevFind(self, node, dev, self.instance)
10919 msg = result.fail_msg
10920 if msg or not result.payload:
10922 msg = "disk not found"
10923 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10926 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10927 for idx, dev in enumerate(self.instance.disks):
10928 if idx not in self.disks:
10931 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10934 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10935 on_primary, ldisk=ldisk):
10936 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10937 " replace disks for instance %s" %
10938 (node_name, self.instance.name))
10940 def _CreateNewStorage(self, node_name):
10941 """Create new storage on the primary or secondary node.
10943 This is only used for same-node replaces, not for changing the
10944 secondary node, hence we don't want to modify the existing disk.
10949 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10950 for idx, dev in enumerate(disks):
10951 if idx not in self.disks:
10954 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10956 self.cfg.SetDiskID(dev, node_name)
10958 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10959 names = _GenerateUniqueNames(self.lu, lv_names)
10961 (data_disk, meta_disk) = dev.children
10962 vg_data = data_disk.logical_id[0]
10963 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10964 logical_id=(vg_data, names[0]),
10965 params=data_disk.params)
10966 vg_meta = meta_disk.logical_id[0]
10967 lv_meta = objects.Disk(dev_type=constants.LD_LV,
10968 size=constants.DRBD_META_SIZE,
10969 logical_id=(vg_meta, names[1]),
10970 params=meta_disk.params)
10972 new_lvs = [lv_data, lv_meta]
10973 old_lvs = [child.Copy() for child in dev.children]
10974 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10976 # we pass force_create=True to force the LVM creation
10977 for new_lv in new_lvs:
10978 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10979 _GetInstanceInfoText(self.instance), False)
10983 def _CheckDevices(self, node_name, iv_names):
10984 for name, (dev, _, _) in iv_names.iteritems():
10985 self.cfg.SetDiskID(dev, node_name)
10987 result = _BlockdevFind(self, node_name, dev, self.instance)
10989 msg = result.fail_msg
10990 if msg or not result.payload:
10992 msg = "disk not found"
10993 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10996 if result.payload.is_degraded:
10997 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10999 def _RemoveOldStorage(self, node_name, iv_names):
11000 for name, (_, old_lvs, _) in iv_names.iteritems():
11001 self.lu.LogInfo("Remove logical volumes for %s" % name)
11004 self.cfg.SetDiskID(lv, node_name)
11006 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11008 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11009 hint="remove unused LVs manually")
11011 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11012 """Replace a disk on the primary or secondary for DRBD 8.
11014 The algorithm for replace is quite complicated:
11016 1. for each disk to be replaced:
11018 1. create new LVs on the target node with unique names
11019 1. detach old LVs from the drbd device
11020 1. rename old LVs to name_replaced.<time_t>
11021 1. rename new LVs to old LVs
11022 1. attach the new LVs (with the old names now) to the drbd device
11024 1. wait for sync across all devices
11026 1. for each modified disk:
11028 1. remove old LVs (which have the name name_replaces.<time_t>)
11030 Failures are not very well handled.
11035 # Step: check device activation
11036 self.lu.LogStep(1, steps_total, "Check device existence")
11037 self._CheckDisksExistence([self.other_node, self.target_node])
11038 self._CheckVolumeGroup([self.target_node, self.other_node])
11040 # Step: check other node consistency
11041 self.lu.LogStep(2, steps_total, "Check peer consistency")
11042 self._CheckDisksConsistency(self.other_node,
11043 self.other_node == self.instance.primary_node,
11046 # Step: create new storage
11047 self.lu.LogStep(3, steps_total, "Allocate new storage")
11048 iv_names = self._CreateNewStorage(self.target_node)
11050 # Step: for each lv, detach+rename*2+attach
11051 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11052 for dev, old_lvs, new_lvs in iv_names.itervalues():
11053 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11055 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11057 result.Raise("Can't detach drbd from local storage on node"
11058 " %s for device %s" % (self.target_node, dev.iv_name))
11060 #cfg.Update(instance)
11062 # ok, we created the new LVs, so now we know we have the needed
11063 # storage; as such, we proceed on the target node to rename
11064 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11065 # using the assumption that logical_id == physical_id (which in
11066 # turn is the unique_id on that node)
11068 # FIXME(iustin): use a better name for the replaced LVs
11069 temp_suffix = int(time.time())
11070 ren_fn = lambda d, suff: (d.physical_id[0],
11071 d.physical_id[1] + "_replaced-%s" % suff)
11073 # Build the rename list based on what LVs exist on the node
11074 rename_old_to_new = []
11075 for to_ren in old_lvs:
11076 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11077 if not result.fail_msg and result.payload:
11079 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11081 self.lu.LogInfo("Renaming the old LVs on the target node")
11082 result = self.rpc.call_blockdev_rename(self.target_node,
11084 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11086 # Now we rename the new LVs to the old LVs
11087 self.lu.LogInfo("Renaming the new LVs on the target node")
11088 rename_new_to_old = [(new, old.physical_id)
11089 for old, new in zip(old_lvs, new_lvs)]
11090 result = self.rpc.call_blockdev_rename(self.target_node,
11092 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11094 # Intermediate steps of in memory modifications
11095 for old, new in zip(old_lvs, new_lvs):
11096 new.logical_id = old.logical_id
11097 self.cfg.SetDiskID(new, self.target_node)
11099 # We need to modify old_lvs so that removal later removes the
11100 # right LVs, not the newly added ones; note that old_lvs is a
11102 for disk in old_lvs:
11103 disk.logical_id = ren_fn(disk, temp_suffix)
11104 self.cfg.SetDiskID(disk, self.target_node)
11106 # Now that the new lvs have the old name, we can add them to the device
11107 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11108 result = self.rpc.call_blockdev_addchildren(self.target_node,
11109 (dev, self.instance), new_lvs)
11110 msg = result.fail_msg
11112 for new_lv in new_lvs:
11113 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11116 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11117 hint=("cleanup manually the unused logical"
11119 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11121 cstep = itertools.count(5)
11123 if self.early_release:
11124 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11125 self._RemoveOldStorage(self.target_node, iv_names)
11126 # TODO: Check if releasing locks early still makes sense
11127 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11129 # Release all resource locks except those used by the instance
11130 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11131 keep=self.node_secondary_ip.keys())
11133 # Release all node locks while waiting for sync
11134 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11136 # TODO: Can the instance lock be downgraded here? Take the optional disk
11137 # shutdown in the caller into consideration.
11140 # This can fail as the old devices are degraded and _WaitForSync
11141 # does a combined result over all disks, so we don't check its return value
11142 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11143 _WaitForSync(self.lu, self.instance)
11145 # Check all devices manually
11146 self._CheckDevices(self.instance.primary_node, iv_names)
11148 # Step: remove old storage
11149 if not self.early_release:
11150 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11151 self._RemoveOldStorage(self.target_node, iv_names)
11153 def _ExecDrbd8Secondary(self, feedback_fn):
11154 """Replace the secondary node for DRBD 8.
11156 The algorithm for replace is quite complicated:
11157 - for all disks of the instance:
11158 - create new LVs on the new node with same names
11159 - shutdown the drbd device on the old secondary
11160 - disconnect the drbd network on the primary
11161 - create the drbd device on the new secondary
11162 - network attach the drbd on the primary, using an artifice:
11163 the drbd code for Attach() will connect to the network if it
11164 finds a device which is connected to the good local disks but
11165 not network enabled
11166 - wait for sync across all devices
11167 - remove all disks from the old secondary
11169 Failures are not very well handled.
11174 pnode = self.instance.primary_node
11176 # Step: check device activation
11177 self.lu.LogStep(1, steps_total, "Check device existence")
11178 self._CheckDisksExistence([self.instance.primary_node])
11179 self._CheckVolumeGroup([self.instance.primary_node])
11181 # Step: check other node consistency
11182 self.lu.LogStep(2, steps_total, "Check peer consistency")
11183 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11185 # Step: create new storage
11186 self.lu.LogStep(3, steps_total, "Allocate new storage")
11187 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11188 for idx, dev in enumerate(disks):
11189 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11190 (self.new_node, idx))
11191 # we pass force_create=True to force LVM creation
11192 for new_lv in dev.children:
11193 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11194 True, _GetInstanceInfoText(self.instance), False)
11196 # Step 4: dbrd minors and drbd setups changes
11197 # after this, we must manually remove the drbd minors on both the
11198 # error and the success paths
11199 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11200 minors = self.cfg.AllocateDRBDMinor([self.new_node
11201 for dev in self.instance.disks],
11202 self.instance.name)
11203 logging.debug("Allocated minors %r", minors)
11206 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11207 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11208 (self.new_node, idx))
11209 # create new devices on new_node; note that we create two IDs:
11210 # one without port, so the drbd will be activated without
11211 # networking information on the new node at this stage, and one
11212 # with network, for the latter activation in step 4
11213 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11214 if self.instance.primary_node == o_node1:
11217 assert self.instance.primary_node == o_node2, "Three-node instance?"
11220 new_alone_id = (self.instance.primary_node, self.new_node, None,
11221 p_minor, new_minor, o_secret)
11222 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11223 p_minor, new_minor, o_secret)
11225 iv_names[idx] = (dev, dev.children, new_net_id)
11226 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11228 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11229 logical_id=new_alone_id,
11230 children=dev.children,
11233 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11236 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11238 _GetInstanceInfoText(self.instance), False)
11239 except errors.GenericError:
11240 self.cfg.ReleaseDRBDMinors(self.instance.name)
11243 # We have new devices, shutdown the drbd on the old secondary
11244 for idx, dev in enumerate(self.instance.disks):
11245 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11246 self.cfg.SetDiskID(dev, self.target_node)
11247 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11248 (dev, self.instance)).fail_msg
11250 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11251 "node: %s" % (idx, msg),
11252 hint=("Please cleanup this device manually as"
11253 " soon as possible"))
11255 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11256 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11257 self.instance.disks)[pnode]
11259 msg = result.fail_msg
11261 # detaches didn't succeed (unlikely)
11262 self.cfg.ReleaseDRBDMinors(self.instance.name)
11263 raise errors.OpExecError("Can't detach the disks from the network on"
11264 " old node: %s" % (msg,))
11266 # if we managed to detach at least one, we update all the disks of
11267 # the instance to point to the new secondary
11268 self.lu.LogInfo("Updating instance configuration")
11269 for dev, _, new_logical_id in iv_names.itervalues():
11270 dev.logical_id = new_logical_id
11271 self.cfg.SetDiskID(dev, self.instance.primary_node)
11273 self.cfg.Update(self.instance, feedback_fn)
11275 # Release all node locks (the configuration has been updated)
11276 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11278 # and now perform the drbd attach
11279 self.lu.LogInfo("Attaching primary drbds to new secondary"
11280 " (standalone => connected)")
11281 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11283 self.node_secondary_ip,
11284 (self.instance.disks, self.instance),
11285 self.instance.name,
11287 for to_node, to_result in result.items():
11288 msg = to_result.fail_msg
11290 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11292 hint=("please do a gnt-instance info to see the"
11293 " status of disks"))
11295 cstep = itertools.count(5)
11297 if self.early_release:
11298 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11299 self._RemoveOldStorage(self.target_node, iv_names)
11300 # TODO: Check if releasing locks early still makes sense
11301 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11303 # Release all resource locks except those used by the instance
11304 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11305 keep=self.node_secondary_ip.keys())
11307 # TODO: Can the instance lock be downgraded here? Take the optional disk
11308 # shutdown in the caller into consideration.
11311 # This can fail as the old devices are degraded and _WaitForSync
11312 # does a combined result over all disks, so we don't check its return value
11313 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11314 _WaitForSync(self.lu, self.instance)
11316 # Check all devices manually
11317 self._CheckDevices(self.instance.primary_node, iv_names)
11319 # Step: remove old storage
11320 if not self.early_release:
11321 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11322 self._RemoveOldStorage(self.target_node, iv_names)
11325 class LURepairNodeStorage(NoHooksLU):
11326 """Repairs the volume group on a node.
11331 def CheckArguments(self):
11332 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11334 storage_type = self.op.storage_type
11336 if (constants.SO_FIX_CONSISTENCY not in
11337 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11338 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11339 " repaired" % storage_type,
11340 errors.ECODE_INVAL)
11342 def ExpandNames(self):
11343 self.needed_locks = {
11344 locking.LEVEL_NODE: [self.op.node_name],
11347 def _CheckFaultyDisks(self, instance, node_name):
11348 """Ensure faulty disks abort the opcode or at least warn."""
11350 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11352 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11353 " node '%s'" % (instance.name, node_name),
11354 errors.ECODE_STATE)
11355 except errors.OpPrereqError, err:
11356 if self.op.ignore_consistency:
11357 self.proc.LogWarning(str(err.args[0]))
11361 def CheckPrereq(self):
11362 """Check prerequisites.
11365 # Check whether any instance on this node has faulty disks
11366 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11367 if inst.admin_state != constants.ADMINST_UP:
11369 check_nodes = set(inst.all_nodes)
11370 check_nodes.discard(self.op.node_name)
11371 for inst_node_name in check_nodes:
11372 self._CheckFaultyDisks(inst, inst_node_name)
11374 def Exec(self, feedback_fn):
11375 feedback_fn("Repairing storage unit '%s' on %s ..." %
11376 (self.op.name, self.op.node_name))
11378 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11379 result = self.rpc.call_storage_execute(self.op.node_name,
11380 self.op.storage_type, st_args,
11382 constants.SO_FIX_CONSISTENCY)
11383 result.Raise("Failed to repair storage unit '%s' on %s" %
11384 (self.op.name, self.op.node_name))
11387 class LUNodeEvacuate(NoHooksLU):
11388 """Evacuates instances off a list of nodes.
11393 _MODE2IALLOCATOR = {
11394 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11395 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11396 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11398 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11399 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11400 constants.IALLOCATOR_NEVAC_MODES)
11402 def CheckArguments(self):
11403 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11405 def ExpandNames(self):
11406 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11408 if self.op.remote_node is not None:
11409 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11410 assert self.op.remote_node
11412 if self.op.remote_node == self.op.node_name:
11413 raise errors.OpPrereqError("Can not use evacuated node as a new"
11414 " secondary node", errors.ECODE_INVAL)
11416 if self.op.mode != constants.NODE_EVAC_SEC:
11417 raise errors.OpPrereqError("Without the use of an iallocator only"
11418 " secondary instances can be evacuated",
11419 errors.ECODE_INVAL)
11422 self.share_locks = _ShareAll()
11423 self.needed_locks = {
11424 locking.LEVEL_INSTANCE: [],
11425 locking.LEVEL_NODEGROUP: [],
11426 locking.LEVEL_NODE: [],
11429 # Determine nodes (via group) optimistically, needs verification once locks
11430 # have been acquired
11431 self.lock_nodes = self._DetermineNodes()
11433 def _DetermineNodes(self):
11434 """Gets the list of nodes to operate on.
11437 if self.op.remote_node is None:
11438 # Iallocator will choose any node(s) in the same group
11439 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11441 group_nodes = frozenset([self.op.remote_node])
11443 # Determine nodes to be locked
11444 return set([self.op.node_name]) | group_nodes
11446 def _DetermineInstances(self):
11447 """Builds list of instances to operate on.
11450 assert self.op.mode in constants.NODE_EVAC_MODES
11452 if self.op.mode == constants.NODE_EVAC_PRI:
11453 # Primary instances only
11454 inst_fn = _GetNodePrimaryInstances
11455 assert self.op.remote_node is None, \
11456 "Evacuating primary instances requires iallocator"
11457 elif self.op.mode == constants.NODE_EVAC_SEC:
11458 # Secondary instances only
11459 inst_fn = _GetNodeSecondaryInstances
11462 assert self.op.mode == constants.NODE_EVAC_ALL
11463 inst_fn = _GetNodeInstances
11464 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11466 raise errors.OpPrereqError("Due to an issue with the iallocator"
11467 " interface it is not possible to evacuate"
11468 " all instances at once; specify explicitly"
11469 " whether to evacuate primary or secondary"
11471 errors.ECODE_INVAL)
11473 return inst_fn(self.cfg, self.op.node_name)
11475 def DeclareLocks(self, level):
11476 if level == locking.LEVEL_INSTANCE:
11477 # Lock instances optimistically, needs verification once node and group
11478 # locks have been acquired
11479 self.needed_locks[locking.LEVEL_INSTANCE] = \
11480 set(i.name for i in self._DetermineInstances())
11482 elif level == locking.LEVEL_NODEGROUP:
11483 # Lock node groups for all potential target nodes optimistically, needs
11484 # verification once nodes have been acquired
11485 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11486 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11488 elif level == locking.LEVEL_NODE:
11489 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11491 def CheckPrereq(self):
11493 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11494 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11495 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11497 need_nodes = self._DetermineNodes()
11499 if not owned_nodes.issuperset(need_nodes):
11500 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11501 " locks were acquired, current nodes are"
11502 " are '%s', used to be '%s'; retry the"
11504 (self.op.node_name,
11505 utils.CommaJoin(need_nodes),
11506 utils.CommaJoin(owned_nodes)),
11507 errors.ECODE_STATE)
11509 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11510 if owned_groups != wanted_groups:
11511 raise errors.OpExecError("Node groups changed since locks were acquired,"
11512 " current groups are '%s', used to be '%s';"
11513 " retry the operation" %
11514 (utils.CommaJoin(wanted_groups),
11515 utils.CommaJoin(owned_groups)))
11517 # Determine affected instances
11518 self.instances = self._DetermineInstances()
11519 self.instance_names = [i.name for i in self.instances]
11521 if set(self.instance_names) != owned_instances:
11522 raise errors.OpExecError("Instances on node '%s' changed since locks"
11523 " were acquired, current instances are '%s',"
11524 " used to be '%s'; retry the operation" %
11525 (self.op.node_name,
11526 utils.CommaJoin(self.instance_names),
11527 utils.CommaJoin(owned_instances)))
11529 if self.instance_names:
11530 self.LogInfo("Evacuating instances from node '%s': %s",
11532 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11534 self.LogInfo("No instances to evacuate from node '%s'",
11537 if self.op.remote_node is not None:
11538 for i in self.instances:
11539 if i.primary_node == self.op.remote_node:
11540 raise errors.OpPrereqError("Node %s is the primary node of"
11541 " instance %s, cannot use it as"
11543 (self.op.remote_node, i.name),
11544 errors.ECODE_INVAL)
11546 def Exec(self, feedback_fn):
11547 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11549 if not self.instance_names:
11550 # No instances to evacuate
11553 elif self.op.iallocator is not None:
11554 # TODO: Implement relocation to other group
11555 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11556 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11557 instances=list(self.instance_names))
11558 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11560 ial.Run(self.op.iallocator)
11562 if not ial.success:
11563 raise errors.OpPrereqError("Can't compute node evacuation using"
11564 " iallocator '%s': %s" %
11565 (self.op.iallocator, ial.info),
11566 errors.ECODE_NORES)
11568 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11570 elif self.op.remote_node is not None:
11571 assert self.op.mode == constants.NODE_EVAC_SEC
11573 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11574 remote_node=self.op.remote_node,
11576 mode=constants.REPLACE_DISK_CHG,
11577 early_release=self.op.early_release)]
11578 for instance_name in self.instance_names
11582 raise errors.ProgrammerError("No iallocator or remote node")
11584 return ResultWithJobs(jobs)
11587 def _SetOpEarlyRelease(early_release, op):
11588 """Sets C{early_release} flag on opcodes if available.
11592 op.early_release = early_release
11593 except AttributeError:
11594 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11599 def _NodeEvacDest(use_nodes, group, nodes):
11600 """Returns group or nodes depending on caller's choice.
11604 return utils.CommaJoin(nodes)
11609 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11610 """Unpacks the result of change-group and node-evacuate iallocator requests.
11612 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11613 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11615 @type lu: L{LogicalUnit}
11616 @param lu: Logical unit instance
11617 @type alloc_result: tuple/list
11618 @param alloc_result: Result from iallocator
11619 @type early_release: bool
11620 @param early_release: Whether to release locks early if possible
11621 @type use_nodes: bool
11622 @param use_nodes: Whether to display node names instead of groups
11625 (moved, failed, jobs) = alloc_result
11628 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11629 for (name, reason) in failed)
11630 lu.LogWarning("Unable to evacuate instances %s", failreason)
11631 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11634 lu.LogInfo("Instances to be moved: %s",
11635 utils.CommaJoin("%s (to %s)" %
11636 (name, _NodeEvacDest(use_nodes, group, nodes))
11637 for (name, group, nodes) in moved))
11639 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11640 map(opcodes.OpCode.LoadOpCode, ops))
11644 class LUInstanceGrowDisk(LogicalUnit):
11645 """Grow a disk of an instance.
11648 HPATH = "disk-grow"
11649 HTYPE = constants.HTYPE_INSTANCE
11652 def ExpandNames(self):
11653 self._ExpandAndLockInstance()
11654 self.needed_locks[locking.LEVEL_NODE] = []
11655 self.needed_locks[locking.LEVEL_NODE_RES] = []
11656 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11657 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11659 def DeclareLocks(self, level):
11660 if level == locking.LEVEL_NODE:
11661 self._LockInstancesNodes()
11662 elif level == locking.LEVEL_NODE_RES:
11664 self.needed_locks[locking.LEVEL_NODE_RES] = \
11665 self.needed_locks[locking.LEVEL_NODE][:]
11667 def BuildHooksEnv(self):
11668 """Build hooks env.
11670 This runs on the master, the primary and all the secondaries.
11674 "DISK": self.op.disk,
11675 "AMOUNT": self.op.amount,
11676 "ABSOLUTE": self.op.absolute,
11678 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11681 def BuildHooksNodes(self):
11682 """Build hooks nodes.
11685 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11688 def CheckPrereq(self):
11689 """Check prerequisites.
11691 This checks that the instance is in the cluster.
11694 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11695 assert instance is not None, \
11696 "Cannot retrieve locked instance %s" % self.op.instance_name
11697 nodenames = list(instance.all_nodes)
11698 for node in nodenames:
11699 _CheckNodeOnline(self, node)
11701 self.instance = instance
11703 if instance.disk_template not in constants.DTS_GROWABLE:
11704 raise errors.OpPrereqError("Instance's disk layout does not support"
11705 " growing", errors.ECODE_INVAL)
11707 self.disk = instance.FindDisk(self.op.disk)
11709 if self.op.absolute:
11710 self.target = self.op.amount
11711 self.delta = self.target - self.disk.size
11713 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11714 "current disk size (%s)" %
11715 (utils.FormatUnit(self.target, "h"),
11716 utils.FormatUnit(self.disk.size, "h")),
11717 errors.ECODE_STATE)
11719 self.delta = self.op.amount
11720 self.target = self.disk.size + self.delta
11722 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11723 utils.FormatUnit(self.delta, "h"),
11724 errors.ECODE_INVAL)
11726 if instance.disk_template not in (constants.DT_FILE,
11727 constants.DT_SHARED_FILE,
11729 # TODO: check the free disk space for file, when that feature will be
11731 _CheckNodesFreeDiskPerVG(self, nodenames,
11732 self.disk.ComputeGrowth(self.delta))
11734 def Exec(self, feedback_fn):
11735 """Execute disk grow.
11738 instance = self.instance
11741 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11742 assert (self.owned_locks(locking.LEVEL_NODE) ==
11743 self.owned_locks(locking.LEVEL_NODE_RES))
11745 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11747 raise errors.OpExecError("Cannot activate block device to grow")
11749 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11750 (self.op.disk, instance.name,
11751 utils.FormatUnit(self.delta, "h"),
11752 utils.FormatUnit(self.target, "h")))
11754 # First run all grow ops in dry-run mode
11755 for node in instance.all_nodes:
11756 self.cfg.SetDiskID(disk, node)
11757 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11759 result.Raise("Grow request failed to node %s" % node)
11761 # We know that (as far as we can test) operations across different
11762 # nodes will succeed, time to run it for real on the backing storage
11763 for node in instance.all_nodes:
11764 self.cfg.SetDiskID(disk, node)
11765 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11767 result.Raise("Grow request failed to node %s" % node)
11769 # And now execute it for logical storage, on the primary node
11770 node = instance.primary_node
11771 self.cfg.SetDiskID(disk, node)
11772 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11774 result.Raise("Grow request failed to node %s" % node)
11776 disk.RecordGrow(self.delta)
11777 self.cfg.Update(instance, feedback_fn)
11779 # Changes have been recorded, release node lock
11780 _ReleaseLocks(self, locking.LEVEL_NODE)
11782 # Downgrade lock while waiting for sync
11783 self.glm.downgrade(locking.LEVEL_INSTANCE)
11785 if self.op.wait_for_sync:
11786 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11788 self.proc.LogWarning("Disk sync-ing has not returned a good"
11789 " status; please check the instance")
11790 if instance.admin_state != constants.ADMINST_UP:
11791 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11792 elif instance.admin_state != constants.ADMINST_UP:
11793 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11794 " not supposed to be running because no wait for"
11795 " sync mode was requested")
11797 assert self.owned_locks(locking.LEVEL_NODE_RES)
11798 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11801 class LUInstanceQueryData(NoHooksLU):
11802 """Query runtime instance data.
11807 def ExpandNames(self):
11808 self.needed_locks = {}
11810 # Use locking if requested or when non-static information is wanted
11811 if not (self.op.static or self.op.use_locking):
11812 self.LogWarning("Non-static data requested, locks need to be acquired")
11813 self.op.use_locking = True
11815 if self.op.instances or not self.op.use_locking:
11816 # Expand instance names right here
11817 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11819 # Will use acquired locks
11820 self.wanted_names = None
11822 if self.op.use_locking:
11823 self.share_locks = _ShareAll()
11825 if self.wanted_names is None:
11826 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11828 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11830 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11831 self.needed_locks[locking.LEVEL_NODE] = []
11832 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11834 def DeclareLocks(self, level):
11835 if self.op.use_locking:
11836 if level == locking.LEVEL_NODEGROUP:
11837 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11839 # Lock all groups used by instances optimistically; this requires going
11840 # via the node before it's locked, requiring verification later on
11841 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11842 frozenset(group_uuid
11843 for instance_name in owned_instances
11845 self.cfg.GetInstanceNodeGroups(instance_name))
11847 elif level == locking.LEVEL_NODE:
11848 self._LockInstancesNodes()
11850 def CheckPrereq(self):
11851 """Check prerequisites.
11853 This only checks the optional instance list against the existing names.
11856 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11857 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11858 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11860 if self.wanted_names is None:
11861 assert self.op.use_locking, "Locking was not used"
11862 self.wanted_names = owned_instances
11864 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11866 if self.op.use_locking:
11867 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11870 assert not (owned_instances or owned_groups or owned_nodes)
11872 self.wanted_instances = instances.values()
11874 def _ComputeBlockdevStatus(self, node, instance, dev):
11875 """Returns the status of a block device
11878 if self.op.static or not node:
11881 self.cfg.SetDiskID(dev, node)
11883 result = self.rpc.call_blockdev_find(node, dev)
11887 result.Raise("Can't compute disk status for %s" % instance.name)
11889 status = result.payload
11893 return (status.dev_path, status.major, status.minor,
11894 status.sync_percent, status.estimated_time,
11895 status.is_degraded, status.ldisk_status)
11897 def _ComputeDiskStatus(self, instance, snode, dev):
11898 """Compute block device status.
11901 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11903 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11905 def _ComputeDiskStatusInner(self, instance, snode, dev):
11906 """Compute block device status.
11908 @attention: The device has to be annotated already.
11911 if dev.dev_type in constants.LDS_DRBD:
11912 # we change the snode then (otherwise we use the one passed in)
11913 if dev.logical_id[0] == instance.primary_node:
11914 snode = dev.logical_id[1]
11916 snode = dev.logical_id[0]
11918 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11920 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11923 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11930 "iv_name": dev.iv_name,
11931 "dev_type": dev.dev_type,
11932 "logical_id": dev.logical_id,
11933 "physical_id": dev.physical_id,
11934 "pstatus": dev_pstatus,
11935 "sstatus": dev_sstatus,
11936 "children": dev_children,
11941 def Exec(self, feedback_fn):
11942 """Gather and return data"""
11945 cluster = self.cfg.GetClusterInfo()
11947 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11948 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11950 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11951 for node in nodes.values()))
11953 group2name_fn = lambda uuid: groups[uuid].name
11955 for instance in self.wanted_instances:
11956 pnode = nodes[instance.primary_node]
11958 if self.op.static or pnode.offline:
11959 remote_state = None
11961 self.LogWarning("Primary node %s is marked offline, returning static"
11962 " information only for instance %s" %
11963 (pnode.name, instance.name))
11965 remote_info = self.rpc.call_instance_info(instance.primary_node,
11967 instance.hypervisor)
11968 remote_info.Raise("Error checking node %s" % instance.primary_node)
11969 remote_info = remote_info.payload
11970 if remote_info and "state" in remote_info:
11971 remote_state = "up"
11973 if instance.admin_state == constants.ADMINST_UP:
11974 remote_state = "down"
11976 remote_state = instance.admin_state
11978 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11981 snodes_group_uuids = [nodes[snode_name].group
11982 for snode_name in instance.secondary_nodes]
11984 result[instance.name] = {
11985 "name": instance.name,
11986 "config_state": instance.admin_state,
11987 "run_state": remote_state,
11988 "pnode": instance.primary_node,
11989 "pnode_group_uuid": pnode.group,
11990 "pnode_group_name": group2name_fn(pnode.group),
11991 "snodes": instance.secondary_nodes,
11992 "snodes_group_uuids": snodes_group_uuids,
11993 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11995 # this happens to be the same format used for hooks
11996 "nics": _NICListToTuple(self, instance.nics),
11997 "disk_template": instance.disk_template,
11999 "hypervisor": instance.hypervisor,
12000 "network_port": instance.network_port,
12001 "hv_instance": instance.hvparams,
12002 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12003 "be_instance": instance.beparams,
12004 "be_actual": cluster.FillBE(instance),
12005 "os_instance": instance.osparams,
12006 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12007 "serial_no": instance.serial_no,
12008 "mtime": instance.mtime,
12009 "ctime": instance.ctime,
12010 "uuid": instance.uuid,
12016 def PrepareContainerMods(mods, private_fn):
12017 """Prepares a list of container modifications by adding a private data field.
12019 @type mods: list of tuples; (operation, index, parameters)
12020 @param mods: List of modifications
12021 @type private_fn: callable or None
12022 @param private_fn: Callable for constructing a private data field for a
12027 if private_fn is None:
12032 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12035 #: Type description for changes as returned by L{ApplyContainerMods}'s
12037 _TApplyContModsCbChanges = \
12038 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12039 ht.TNonEmptyString,
12044 def ApplyContainerMods(kind, container, chgdesc, mods,
12045 create_fn, modify_fn, remove_fn):
12046 """Applies descriptions in C{mods} to C{container}.
12049 @param kind: One-word item description
12050 @type container: list
12051 @param container: Container to modify
12052 @type chgdesc: None or list
12053 @param chgdesc: List of applied changes
12055 @param mods: Modifications as returned by L{PrepareContainerMods}
12056 @type create_fn: callable
12057 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12058 receives absolute item index, parameters and private data object as added
12059 by L{PrepareContainerMods}, returns tuple containing new item and changes
12061 @type modify_fn: callable
12062 @param modify_fn: Callback for modifying an existing item
12063 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12064 and private data object as added by L{PrepareContainerMods}, returns
12066 @type remove_fn: callable
12067 @param remove_fn: Callback on removing item; receives absolute item index,
12068 item and private data object as added by L{PrepareContainerMods}
12071 for (op, idx, params, private) in mods:
12074 absidx = len(container) - 1
12076 raise IndexError("Not accepting negative indices other than -1")
12077 elif idx > len(container):
12078 raise IndexError("Got %s index %s, but there are only %s" %
12079 (kind, idx, len(container)))
12085 if op == constants.DDM_ADD:
12086 # Calculate where item will be added
12088 addidx = len(container)
12092 if create_fn is None:
12095 (item, changes) = create_fn(addidx, params, private)
12098 container.append(item)
12101 assert idx <= len(container)
12102 # list.insert does so before the specified index
12103 container.insert(idx, item)
12105 # Retrieve existing item
12107 item = container[absidx]
12109 raise IndexError("Invalid %s index %s" % (kind, idx))
12111 if op == constants.DDM_REMOVE:
12114 if remove_fn is not None:
12115 remove_fn(absidx, item, private)
12117 changes = [("%s/%s" % (kind, absidx), "remove")]
12119 assert container[absidx] == item
12120 del container[absidx]
12121 elif op == constants.DDM_MODIFY:
12122 if modify_fn is not None:
12123 changes = modify_fn(absidx, item, params, private)
12125 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12127 assert _TApplyContModsCbChanges(changes)
12129 if not (chgdesc is None or changes is None):
12130 chgdesc.extend(changes)
12133 def _UpdateIvNames(base_index, disks):
12134 """Updates the C{iv_name} attribute of disks.
12136 @type disks: list of L{objects.Disk}
12139 for (idx, disk) in enumerate(disks):
12140 disk.iv_name = "disk/%s" % (base_index + idx, )
12143 class _InstNicModPrivate:
12144 """Data structure for network interface modifications.
12146 Used by L{LUInstanceSetParams}.
12149 def __init__(self):
12154 class LUInstanceSetParams(LogicalUnit):
12155 """Modifies an instances's parameters.
12158 HPATH = "instance-modify"
12159 HTYPE = constants.HTYPE_INSTANCE
12163 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12164 assert ht.TList(mods)
12165 assert not mods or len(mods[0]) in (2, 3)
12167 if mods and len(mods[0]) == 2:
12171 for op, params in mods:
12172 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12173 result.append((op, -1, params))
12177 raise errors.OpPrereqError("Only one %s add or remove operation is"
12178 " supported at a time" % kind,
12179 errors.ECODE_INVAL)
12181 result.append((constants.DDM_MODIFY, op, params))
12183 assert verify_fn(result)
12190 def _CheckMods(kind, mods, key_types, item_fn):
12191 """Ensures requested disk/NIC modifications are valid.
12194 for (op, _, params) in mods:
12195 assert ht.TDict(params)
12197 utils.ForceDictType(params, key_types)
12199 if op == constants.DDM_REMOVE:
12201 raise errors.OpPrereqError("No settings should be passed when"
12202 " removing a %s" % kind,
12203 errors.ECODE_INVAL)
12204 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12205 item_fn(op, params)
12207 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12210 def _VerifyDiskModification(op, params):
12211 """Verifies a disk modification.
12214 if op == constants.DDM_ADD:
12215 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12216 if mode not in constants.DISK_ACCESS_SET:
12217 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12218 errors.ECODE_INVAL)
12220 size = params.get(constants.IDISK_SIZE, None)
12222 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12223 constants.IDISK_SIZE, errors.ECODE_INVAL)
12227 except (TypeError, ValueError), err:
12228 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12229 errors.ECODE_INVAL)
12231 params[constants.IDISK_SIZE] = size
12233 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12234 raise errors.OpPrereqError("Disk size change not possible, use"
12235 " grow-disk", errors.ECODE_INVAL)
12238 def _VerifyNicModification(op, params):
12239 """Verifies a network interface modification.
12242 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12243 ip = params.get(constants.INIC_IP, None)
12246 elif ip.lower() == constants.VALUE_NONE:
12247 params[constants.INIC_IP] = None
12248 elif not netutils.IPAddress.IsValid(ip):
12249 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12250 errors.ECODE_INVAL)
12252 bridge = params.get("bridge", None)
12253 link = params.get(constants.INIC_LINK, None)
12254 if bridge and link:
12255 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12256 " at the same time", errors.ECODE_INVAL)
12257 elif bridge and bridge.lower() == constants.VALUE_NONE:
12258 params["bridge"] = None
12259 elif link and link.lower() == constants.VALUE_NONE:
12260 params[constants.INIC_LINK] = None
12262 if op == constants.DDM_ADD:
12263 macaddr = params.get(constants.INIC_MAC, None)
12264 if macaddr is None:
12265 params[constants.INIC_MAC] = constants.VALUE_AUTO
12267 if constants.INIC_MAC in params:
12268 macaddr = params[constants.INIC_MAC]
12269 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12270 macaddr = utils.NormalizeAndValidateMac(macaddr)
12272 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12273 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12274 " modifying an existing NIC",
12275 errors.ECODE_INVAL)
12277 def CheckArguments(self):
12278 if not (self.op.nics or self.op.disks or self.op.disk_template or
12279 self.op.hvparams or self.op.beparams or self.op.os_name or
12280 self.op.offline is not None or self.op.runtime_mem):
12281 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12283 if self.op.hvparams:
12284 _CheckGlobalHvParams(self.op.hvparams)
12286 self.op.disks = self._UpgradeDiskNicMods(
12287 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12288 self.op.nics = self._UpgradeDiskNicMods(
12289 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12291 # Check disk modifications
12292 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12293 self._VerifyDiskModification)
12295 if self.op.disks and self.op.disk_template is not None:
12296 raise errors.OpPrereqError("Disk template conversion and other disk"
12297 " changes not supported at the same time",
12298 errors.ECODE_INVAL)
12300 if (self.op.disk_template and
12301 self.op.disk_template in constants.DTS_INT_MIRROR and
12302 self.op.remote_node is None):
12303 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12304 " one requires specifying a secondary node",
12305 errors.ECODE_INVAL)
12307 # Check NIC modifications
12308 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12309 self._VerifyNicModification)
12311 def ExpandNames(self):
12312 self._ExpandAndLockInstance()
12313 # Can't even acquire node locks in shared mode as upcoming changes in
12314 # Ganeti 2.6 will start to modify the node object on disk conversion
12315 self.needed_locks[locking.LEVEL_NODE] = []
12316 self.needed_locks[locking.LEVEL_NODE_RES] = []
12317 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12319 def DeclareLocks(self, level):
12320 # TODO: Acquire group lock in shared mode (disk parameters)
12321 if level == locking.LEVEL_NODE:
12322 self._LockInstancesNodes()
12323 if self.op.disk_template and self.op.remote_node:
12324 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12325 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12326 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12328 self.needed_locks[locking.LEVEL_NODE_RES] = \
12329 self.needed_locks[locking.LEVEL_NODE][:]
12331 def BuildHooksEnv(self):
12332 """Build hooks env.
12334 This runs on the master, primary and secondaries.
12338 if constants.BE_MINMEM in self.be_new:
12339 args["minmem"] = self.be_new[constants.BE_MINMEM]
12340 if constants.BE_MAXMEM in self.be_new:
12341 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12342 if constants.BE_VCPUS in self.be_new:
12343 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12344 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12345 # information at all.
12347 if self._new_nics is not None:
12350 for nic in self._new_nics:
12351 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12352 mode = nicparams[constants.NIC_MODE]
12353 link = nicparams[constants.NIC_LINK]
12354 nics.append((nic.ip, nic.mac, mode, link))
12356 args["nics"] = nics
12358 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12359 if self.op.disk_template:
12360 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12361 if self.op.runtime_mem:
12362 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12366 def BuildHooksNodes(self):
12367 """Build hooks nodes.
12370 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12373 def _PrepareNicModification(self, params, private, old_ip, old_params,
12375 update_params_dict = dict([(key, params[key])
12376 for key in constants.NICS_PARAMETERS
12379 if "bridge" in params:
12380 update_params_dict[constants.NIC_LINK] = params["bridge"]
12382 new_params = _GetUpdatedParams(old_params, update_params_dict)
12383 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12385 new_filled_params = cluster.SimpleFillNIC(new_params)
12386 objects.NIC.CheckParameterSyntax(new_filled_params)
12388 new_mode = new_filled_params[constants.NIC_MODE]
12389 if new_mode == constants.NIC_MODE_BRIDGED:
12390 bridge = new_filled_params[constants.NIC_LINK]
12391 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12393 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12395 self.warn.append(msg)
12397 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12399 elif new_mode == constants.NIC_MODE_ROUTED:
12400 ip = params.get(constants.INIC_IP, old_ip)
12402 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12403 " on a routed NIC", errors.ECODE_INVAL)
12405 if constants.INIC_MAC in params:
12406 mac = params[constants.INIC_MAC]
12408 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12409 errors.ECODE_INVAL)
12410 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12411 # otherwise generate the MAC address
12412 params[constants.INIC_MAC] = \
12413 self.cfg.GenerateMAC(self.proc.GetECId())
12415 # or validate/reserve the current one
12417 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12418 except errors.ReservationError:
12419 raise errors.OpPrereqError("MAC address '%s' already in use"
12420 " in cluster" % mac,
12421 errors.ECODE_NOTUNIQUE)
12423 private.params = new_params
12424 private.filled = new_filled_params
12426 def CheckPrereq(self):
12427 """Check prerequisites.
12429 This only checks the instance list against the existing names.
12432 # checking the new params on the primary/secondary nodes
12434 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12435 cluster = self.cluster = self.cfg.GetClusterInfo()
12436 assert self.instance is not None, \
12437 "Cannot retrieve locked instance %s" % self.op.instance_name
12438 pnode = instance.primary_node
12439 nodelist = list(instance.all_nodes)
12440 pnode_info = self.cfg.GetNodeInfo(pnode)
12441 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12443 # Prepare disk/NIC modifications
12444 self.diskmod = PrepareContainerMods(self.op.disks, None)
12445 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12448 if self.op.os_name and not self.op.force:
12449 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12450 self.op.force_variant)
12451 instance_os = self.op.os_name
12453 instance_os = instance.os
12455 assert not (self.op.disk_template and self.op.disks), \
12456 "Can't modify disk template and apply disk changes at the same time"
12458 if self.op.disk_template:
12459 if instance.disk_template == self.op.disk_template:
12460 raise errors.OpPrereqError("Instance already has disk template %s" %
12461 instance.disk_template, errors.ECODE_INVAL)
12463 if (instance.disk_template,
12464 self.op.disk_template) not in self._DISK_CONVERSIONS:
12465 raise errors.OpPrereqError("Unsupported disk template conversion from"
12466 " %s to %s" % (instance.disk_template,
12467 self.op.disk_template),
12468 errors.ECODE_INVAL)
12469 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12470 msg="cannot change disk template")
12471 if self.op.disk_template in constants.DTS_INT_MIRROR:
12472 if self.op.remote_node == pnode:
12473 raise errors.OpPrereqError("Given new secondary node %s is the same"
12474 " as the primary node of the instance" %
12475 self.op.remote_node, errors.ECODE_STATE)
12476 _CheckNodeOnline(self, self.op.remote_node)
12477 _CheckNodeNotDrained(self, self.op.remote_node)
12478 # FIXME: here we assume that the old instance type is DT_PLAIN
12479 assert instance.disk_template == constants.DT_PLAIN
12480 disks = [{constants.IDISK_SIZE: d.size,
12481 constants.IDISK_VG: d.logical_id[0]}
12482 for d in instance.disks]
12483 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12484 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12486 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12487 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12488 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12490 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12491 ignore=self.op.ignore_ipolicy)
12492 if pnode_info.group != snode_info.group:
12493 self.LogWarning("The primary and secondary nodes are in two"
12494 " different node groups; the disk parameters"
12495 " from the first disk's node group will be"
12498 # hvparams processing
12499 if self.op.hvparams:
12500 hv_type = instance.hypervisor
12501 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12502 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12503 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12506 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12507 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12508 self.hv_proposed = self.hv_new = hv_new # the new actual values
12509 self.hv_inst = i_hvdict # the new dict (without defaults)
12511 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12513 self.hv_new = self.hv_inst = {}
12515 # beparams processing
12516 if self.op.beparams:
12517 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12519 objects.UpgradeBeParams(i_bedict)
12520 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12521 be_new = cluster.SimpleFillBE(i_bedict)
12522 self.be_proposed = self.be_new = be_new # the new actual values
12523 self.be_inst = i_bedict # the new dict (without defaults)
12525 self.be_new = self.be_inst = {}
12526 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12527 be_old = cluster.FillBE(instance)
12529 # CPU param validation -- checking every time a parameter is
12530 # changed to cover all cases where either CPU mask or vcpus have
12532 if (constants.BE_VCPUS in self.be_proposed and
12533 constants.HV_CPU_MASK in self.hv_proposed):
12535 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12536 # Verify mask is consistent with number of vCPUs. Can skip this
12537 # test if only 1 entry in the CPU mask, which means same mask
12538 # is applied to all vCPUs.
12539 if (len(cpu_list) > 1 and
12540 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12541 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12543 (self.be_proposed[constants.BE_VCPUS],
12544 self.hv_proposed[constants.HV_CPU_MASK]),
12545 errors.ECODE_INVAL)
12547 # Only perform this test if a new CPU mask is given
12548 if constants.HV_CPU_MASK in self.hv_new:
12549 # Calculate the largest CPU number requested
12550 max_requested_cpu = max(map(max, cpu_list))
12551 # Check that all of the instance's nodes have enough physical CPUs to
12552 # satisfy the requested CPU mask
12553 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12554 max_requested_cpu + 1, instance.hypervisor)
12556 # osparams processing
12557 if self.op.osparams:
12558 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12559 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12560 self.os_inst = i_osdict # the new dict (without defaults)
12566 #TODO(dynmem): do the appropriate check involving MINMEM
12567 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12568 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12569 mem_check_list = [pnode]
12570 if be_new[constants.BE_AUTO_BALANCE]:
12571 # either we changed auto_balance to yes or it was from before
12572 mem_check_list.extend(instance.secondary_nodes)
12573 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12574 instance.hypervisor)
12575 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12576 [instance.hypervisor])
12577 pninfo = nodeinfo[pnode]
12578 msg = pninfo.fail_msg
12580 # Assume the primary node is unreachable and go ahead
12581 self.warn.append("Can't get info from primary node %s: %s" %
12584 (_, _, (pnhvinfo, )) = pninfo.payload
12585 if not isinstance(pnhvinfo.get("memory_free", None), int):
12586 self.warn.append("Node data from primary node %s doesn't contain"
12587 " free memory information" % pnode)
12588 elif instance_info.fail_msg:
12589 self.warn.append("Can't get instance runtime information: %s" %
12590 instance_info.fail_msg)
12592 if instance_info.payload:
12593 current_mem = int(instance_info.payload["memory"])
12595 # Assume instance not running
12596 # (there is a slight race condition here, but it's not very
12597 # probable, and we have no other way to check)
12598 # TODO: Describe race condition
12600 #TODO(dynmem): do the appropriate check involving MINMEM
12601 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12602 pnhvinfo["memory_free"])
12604 raise errors.OpPrereqError("This change will prevent the instance"
12605 " from starting, due to %d MB of memory"
12606 " missing on its primary node" %
12607 miss_mem, errors.ECODE_NORES)
12609 if be_new[constants.BE_AUTO_BALANCE]:
12610 for node, nres in nodeinfo.items():
12611 if node not in instance.secondary_nodes:
12613 nres.Raise("Can't get info from secondary node %s" % node,
12614 prereq=True, ecode=errors.ECODE_STATE)
12615 (_, _, (nhvinfo, )) = nres.payload
12616 if not isinstance(nhvinfo.get("memory_free", None), int):
12617 raise errors.OpPrereqError("Secondary node %s didn't return free"
12618 " memory information" % node,
12619 errors.ECODE_STATE)
12620 #TODO(dynmem): do the appropriate check involving MINMEM
12621 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12622 raise errors.OpPrereqError("This change will prevent the instance"
12623 " from failover to its secondary node"
12624 " %s, due to not enough memory" % node,
12625 errors.ECODE_STATE)
12627 if self.op.runtime_mem:
12628 remote_info = self.rpc.call_instance_info(instance.primary_node,
12630 instance.hypervisor)
12631 remote_info.Raise("Error checking node %s" % instance.primary_node)
12632 if not remote_info.payload: # not running already
12633 raise errors.OpPrereqError("Instance %s is not running" %
12634 instance.name, errors.ECODE_STATE)
12636 current_memory = remote_info.payload["memory"]
12637 if (not self.op.force and
12638 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12639 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12640 raise errors.OpPrereqError("Instance %s must have memory between %d"
12641 " and %d MB of memory unless --force is"
12644 self.be_proposed[constants.BE_MINMEM],
12645 self.be_proposed[constants.BE_MAXMEM]),
12646 errors.ECODE_INVAL)
12648 if self.op.runtime_mem > current_memory:
12649 _CheckNodeFreeMemory(self, instance.primary_node,
12650 "ballooning memory for instance %s" %
12652 self.op.memory - current_memory,
12653 instance.hypervisor)
12655 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12656 raise errors.OpPrereqError("Disk operations not supported for"
12657 " diskless instances", errors.ECODE_INVAL)
12659 def _PrepareNicCreate(_, params, private):
12660 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12661 return (None, None)
12663 def _PrepareNicMod(_, nic, params, private):
12664 self._PrepareNicModification(params, private, nic.ip,
12665 nic.nicparams, cluster, pnode)
12668 # Verify NIC changes (operating on copy)
12669 nics = instance.nics[:]
12670 ApplyContainerMods("NIC", nics, None, self.nicmod,
12671 _PrepareNicCreate, _PrepareNicMod, None)
12672 if len(nics) > constants.MAX_NICS:
12673 raise errors.OpPrereqError("Instance has too many network interfaces"
12674 " (%d), cannot add more" % constants.MAX_NICS,
12675 errors.ECODE_STATE)
12677 # Verify disk changes (operating on a copy)
12678 disks = instance.disks[:]
12679 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12680 if len(disks) > constants.MAX_DISKS:
12681 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12682 " more" % constants.MAX_DISKS,
12683 errors.ECODE_STATE)
12685 if self.op.offline is not None:
12686 if self.op.offline:
12687 msg = "can't change to offline"
12689 msg = "can't change to online"
12690 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12692 # Pre-compute NIC changes (necessary to use result in hooks)
12693 self._nic_chgdesc = []
12695 # Operate on copies as this is still in prereq
12696 nics = [nic.Copy() for nic in instance.nics]
12697 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12698 self._CreateNewNic, self._ApplyNicMods, None)
12699 self._new_nics = nics
12701 self._new_nics = None
12703 def _ConvertPlainToDrbd(self, feedback_fn):
12704 """Converts an instance from plain to drbd.
12707 feedback_fn("Converting template to drbd")
12708 instance = self.instance
12709 pnode = instance.primary_node
12710 snode = self.op.remote_node
12712 assert instance.disk_template == constants.DT_PLAIN
12714 # create a fake disk info for _GenerateDiskTemplate
12715 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12716 constants.IDISK_VG: d.logical_id[0]}
12717 for d in instance.disks]
12718 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12719 instance.name, pnode, [snode],
12720 disk_info, None, None, 0, feedback_fn,
12722 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12724 info = _GetInstanceInfoText(instance)
12725 feedback_fn("Creating additional volumes...")
12726 # first, create the missing data and meta devices
12727 for disk in anno_disks:
12728 # unfortunately this is... not too nice
12729 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12731 for child in disk.children:
12732 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12733 # at this stage, all new LVs have been created, we can rename the
12735 feedback_fn("Renaming original volumes...")
12736 rename_list = [(o, n.children[0].logical_id)
12737 for (o, n) in zip(instance.disks, new_disks)]
12738 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12739 result.Raise("Failed to rename original LVs")
12741 feedback_fn("Initializing DRBD devices...")
12742 # all child devices are in place, we can now create the DRBD devices
12743 for disk in anno_disks:
12744 for node in [pnode, snode]:
12745 f_create = node == pnode
12746 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12748 # at this point, the instance has been modified
12749 instance.disk_template = constants.DT_DRBD8
12750 instance.disks = new_disks
12751 self.cfg.Update(instance, feedback_fn)
12753 # Release node locks while waiting for sync
12754 _ReleaseLocks(self, locking.LEVEL_NODE)
12756 # disks are created, waiting for sync
12757 disk_abort = not _WaitForSync(self, instance,
12758 oneshot=not self.op.wait_for_sync)
12760 raise errors.OpExecError("There are some degraded disks for"
12761 " this instance, please cleanup manually")
12763 # Node resource locks will be released by caller
12765 def _ConvertDrbdToPlain(self, feedback_fn):
12766 """Converts an instance from drbd to plain.
12769 instance = self.instance
12771 assert len(instance.secondary_nodes) == 1
12772 assert instance.disk_template == constants.DT_DRBD8
12774 pnode = instance.primary_node
12775 snode = instance.secondary_nodes[0]
12776 feedback_fn("Converting template to plain")
12778 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12779 new_disks = [d.children[0] for d in instance.disks]
12781 # copy over size and mode
12782 for parent, child in zip(old_disks, new_disks):
12783 child.size = parent.size
12784 child.mode = parent.mode
12786 # this is a DRBD disk, return its port to the pool
12787 # NOTE: this must be done right before the call to cfg.Update!
12788 for disk in old_disks:
12789 tcp_port = disk.logical_id[2]
12790 self.cfg.AddTcpUdpPort(tcp_port)
12792 # update instance structure
12793 instance.disks = new_disks
12794 instance.disk_template = constants.DT_PLAIN
12795 self.cfg.Update(instance, feedback_fn)
12797 # Release locks in case removing disks takes a while
12798 _ReleaseLocks(self, locking.LEVEL_NODE)
12800 feedback_fn("Removing volumes on the secondary node...")
12801 for disk in old_disks:
12802 self.cfg.SetDiskID(disk, snode)
12803 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12805 self.LogWarning("Could not remove block device %s on node %s,"
12806 " continuing anyway: %s", disk.iv_name, snode, msg)
12808 feedback_fn("Removing unneeded volumes on the primary node...")
12809 for idx, disk in enumerate(old_disks):
12810 meta = disk.children[1]
12811 self.cfg.SetDiskID(meta, pnode)
12812 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12814 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12815 " continuing anyway: %s", idx, pnode, msg)
12817 def _CreateNewDisk(self, idx, params, _):
12818 """Creates a new disk.
12821 instance = self.instance
12824 if instance.disk_template in constants.DTS_FILEBASED:
12825 (file_driver, file_path) = instance.disks[0].logical_id
12826 file_path = os.path.dirname(file_path)
12828 file_driver = file_path = None
12831 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12832 instance.primary_node, instance.secondary_nodes,
12833 [params], file_path, file_driver, idx,
12834 self.Log, self.diskparams)[0]
12836 info = _GetInstanceInfoText(instance)
12838 logging.info("Creating volume %s for instance %s",
12839 disk.iv_name, instance.name)
12840 # Note: this needs to be kept in sync with _CreateDisks
12842 for node in instance.all_nodes:
12843 f_create = (node == instance.primary_node)
12845 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12846 except errors.OpExecError, err:
12847 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12848 disk.iv_name, disk, node, err)
12851 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12855 def _ModifyDisk(idx, disk, params, _):
12856 """Modifies a disk.
12859 disk.mode = params[constants.IDISK_MODE]
12862 ("disk.mode/%d" % idx, disk.mode),
12865 def _RemoveDisk(self, idx, root, _):
12869 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12870 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12871 self.cfg.SetDiskID(disk, node)
12872 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12874 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12875 " continuing anyway", idx, node, msg)
12877 # if this is a DRBD disk, return its port to the pool
12878 if root.dev_type in constants.LDS_DRBD:
12879 self.cfg.AddTcpUdpPort(root.logical_id[2])
12882 def _CreateNewNic(idx, params, private):
12883 """Creates data structure for a new network interface.
12886 mac = params[constants.INIC_MAC]
12887 ip = params.get(constants.INIC_IP, None)
12888 nicparams = private.params
12890 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12892 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12893 (mac, ip, private.filled[constants.NIC_MODE],
12894 private.filled[constants.NIC_LINK])),
12898 def _ApplyNicMods(idx, nic, params, private):
12899 """Modifies a network interface.
12904 for key in [constants.INIC_MAC, constants.INIC_IP]:
12906 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12907 setattr(nic, key, params[key])
12910 nic.nicparams = private.params
12912 for (key, val) in params.items():
12913 changes.append(("nic.%s/%d" % (key, idx), val))
12917 def Exec(self, feedback_fn):
12918 """Modifies an instance.
12920 All parameters take effect only at the next restart of the instance.
12923 # Process here the warnings from CheckPrereq, as we don't have a
12924 # feedback_fn there.
12925 # TODO: Replace with self.LogWarning
12926 for warn in self.warn:
12927 feedback_fn("WARNING: %s" % warn)
12929 assert ((self.op.disk_template is None) ^
12930 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12931 "Not owning any node resource locks"
12934 instance = self.instance
12937 if self.op.runtime_mem:
12938 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12940 self.op.runtime_mem)
12941 rpcres.Raise("Cannot modify instance runtime memory")
12942 result.append(("runtime_memory", self.op.runtime_mem))
12944 # Apply disk changes
12945 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12946 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12947 _UpdateIvNames(0, instance.disks)
12949 if self.op.disk_template:
12951 check_nodes = set(instance.all_nodes)
12952 if self.op.remote_node:
12953 check_nodes.add(self.op.remote_node)
12954 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12955 owned = self.owned_locks(level)
12956 assert not (check_nodes - owned), \
12957 ("Not owning the correct locks, owning %r, expected at least %r" %
12958 (owned, check_nodes))
12960 r_shut = _ShutdownInstanceDisks(self, instance)
12962 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12963 " proceed with disk template conversion")
12964 mode = (instance.disk_template, self.op.disk_template)
12966 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12968 self.cfg.ReleaseDRBDMinors(instance.name)
12970 result.append(("disk_template", self.op.disk_template))
12972 assert instance.disk_template == self.op.disk_template, \
12973 ("Expected disk template '%s', found '%s'" %
12974 (self.op.disk_template, instance.disk_template))
12976 # Release node and resource locks if there are any (they might already have
12977 # been released during disk conversion)
12978 _ReleaseLocks(self, locking.LEVEL_NODE)
12979 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12981 # Apply NIC changes
12982 if self._new_nics is not None:
12983 instance.nics = self._new_nics
12984 result.extend(self._nic_chgdesc)
12987 if self.op.hvparams:
12988 instance.hvparams = self.hv_inst
12989 for key, val in self.op.hvparams.iteritems():
12990 result.append(("hv/%s" % key, val))
12993 if self.op.beparams:
12994 instance.beparams = self.be_inst
12995 for key, val in self.op.beparams.iteritems():
12996 result.append(("be/%s" % key, val))
12999 if self.op.os_name:
13000 instance.os = self.op.os_name
13003 if self.op.osparams:
13004 instance.osparams = self.os_inst
13005 for key, val in self.op.osparams.iteritems():
13006 result.append(("os/%s" % key, val))
13008 if self.op.offline is None:
13011 elif self.op.offline:
13012 # Mark instance as offline
13013 self.cfg.MarkInstanceOffline(instance.name)
13014 result.append(("admin_state", constants.ADMINST_OFFLINE))
13016 # Mark instance as online, but stopped
13017 self.cfg.MarkInstanceDown(instance.name)
13018 result.append(("admin_state", constants.ADMINST_DOWN))
13020 self.cfg.Update(instance, feedback_fn)
13022 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13023 self.owned_locks(locking.LEVEL_NODE)), \
13024 "All node locks should have been released by now"
13028 _DISK_CONVERSIONS = {
13029 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13030 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13034 class LUInstanceChangeGroup(LogicalUnit):
13035 HPATH = "instance-change-group"
13036 HTYPE = constants.HTYPE_INSTANCE
13039 def ExpandNames(self):
13040 self.share_locks = _ShareAll()
13041 self.needed_locks = {
13042 locking.LEVEL_NODEGROUP: [],
13043 locking.LEVEL_NODE: [],
13046 self._ExpandAndLockInstance()
13048 if self.op.target_groups:
13049 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13050 self.op.target_groups)
13052 self.req_target_uuids = None
13054 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13056 def DeclareLocks(self, level):
13057 if level == locking.LEVEL_NODEGROUP:
13058 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13060 if self.req_target_uuids:
13061 lock_groups = set(self.req_target_uuids)
13063 # Lock all groups used by instance optimistically; this requires going
13064 # via the node before it's locked, requiring verification later on
13065 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13066 lock_groups.update(instance_groups)
13068 # No target groups, need to lock all of them
13069 lock_groups = locking.ALL_SET
13071 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13073 elif level == locking.LEVEL_NODE:
13074 if self.req_target_uuids:
13075 # Lock all nodes used by instances
13076 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13077 self._LockInstancesNodes()
13079 # Lock all nodes in all potential target groups
13080 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13081 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13082 member_nodes = [node_name
13083 for group in lock_groups
13084 for node_name in self.cfg.GetNodeGroup(group).members]
13085 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13087 # Lock all nodes as all groups are potential targets
13088 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13090 def CheckPrereq(self):
13091 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13092 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13093 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13095 assert (self.req_target_uuids is None or
13096 owned_groups.issuperset(self.req_target_uuids))
13097 assert owned_instances == set([self.op.instance_name])
13099 # Get instance information
13100 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13102 # Check if node groups for locked instance are still correct
13103 assert owned_nodes.issuperset(self.instance.all_nodes), \
13104 ("Instance %s's nodes changed while we kept the lock" %
13105 self.op.instance_name)
13107 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13110 if self.req_target_uuids:
13111 # User requested specific target groups
13112 self.target_uuids = frozenset(self.req_target_uuids)
13114 # All groups except those used by the instance are potential targets
13115 self.target_uuids = owned_groups - inst_groups
13117 conflicting_groups = self.target_uuids & inst_groups
13118 if conflicting_groups:
13119 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13120 " used by the instance '%s'" %
13121 (utils.CommaJoin(conflicting_groups),
13122 self.op.instance_name),
13123 errors.ECODE_INVAL)
13125 if not self.target_uuids:
13126 raise errors.OpPrereqError("There are no possible target groups",
13127 errors.ECODE_INVAL)
13129 def BuildHooksEnv(self):
13130 """Build hooks env.
13133 assert self.target_uuids
13136 "TARGET_GROUPS": " ".join(self.target_uuids),
13139 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13143 def BuildHooksNodes(self):
13144 """Build hooks nodes.
13147 mn = self.cfg.GetMasterNode()
13148 return ([mn], [mn])
13150 def Exec(self, feedback_fn):
13151 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13153 assert instances == [self.op.instance_name], "Instance not locked"
13155 req = iallocator.IAReqGroupChange(instances=instances,
13156 target_groups=list(self.target_uuids))
13157 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13159 ial.Run(self.op.iallocator)
13161 if not ial.success:
13162 raise errors.OpPrereqError("Can't compute solution for changing group of"
13163 " instance '%s' using iallocator '%s': %s" %
13164 (self.op.instance_name, self.op.iallocator,
13165 ial.info), errors.ECODE_NORES)
13167 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13169 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13170 " instance '%s'", len(jobs), self.op.instance_name)
13172 return ResultWithJobs(jobs)
13175 class LUBackupQuery(NoHooksLU):
13176 """Query the exports list
13181 def CheckArguments(self):
13182 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13183 ["node", "export"], self.op.use_locking)
13185 def ExpandNames(self):
13186 self.expq.ExpandNames(self)
13188 def DeclareLocks(self, level):
13189 self.expq.DeclareLocks(self, level)
13191 def Exec(self, feedback_fn):
13194 for (node, expname) in self.expq.OldStyleQuery(self):
13195 if expname is None:
13196 result[node] = False
13198 result.setdefault(node, []).append(expname)
13203 class _ExportQuery(_QueryBase):
13204 FIELDS = query.EXPORT_FIELDS
13206 #: The node name is not a unique key for this query
13207 SORT_FIELD = "node"
13209 def ExpandNames(self, lu):
13210 lu.needed_locks = {}
13212 # The following variables interact with _QueryBase._GetNames
13214 self.wanted = _GetWantedNodes(lu, self.names)
13216 self.wanted = locking.ALL_SET
13218 self.do_locking = self.use_locking
13220 if self.do_locking:
13221 lu.share_locks = _ShareAll()
13222 lu.needed_locks = {
13223 locking.LEVEL_NODE: self.wanted,
13226 def DeclareLocks(self, lu, level):
13229 def _GetQueryData(self, lu):
13230 """Computes the list of nodes and their attributes.
13233 # Locking is not used
13235 assert not (compat.any(lu.glm.is_owned(level)
13236 for level in locking.LEVELS
13237 if level != locking.LEVEL_CLUSTER) or
13238 self.do_locking or self.use_locking)
13240 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13244 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13246 result.append((node, None))
13248 result.extend((node, expname) for expname in nres.payload)
13253 class LUBackupPrepare(NoHooksLU):
13254 """Prepares an instance for an export and returns useful information.
13259 def ExpandNames(self):
13260 self._ExpandAndLockInstance()
13262 def CheckPrereq(self):
13263 """Check prerequisites.
13266 instance_name = self.op.instance_name
13268 self.instance = self.cfg.GetInstanceInfo(instance_name)
13269 assert self.instance is not None, \
13270 "Cannot retrieve locked instance %s" % self.op.instance_name
13271 _CheckNodeOnline(self, self.instance.primary_node)
13273 self._cds = _GetClusterDomainSecret()
13275 def Exec(self, feedback_fn):
13276 """Prepares an instance for an export.
13279 instance = self.instance
13281 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13282 salt = utils.GenerateSecret(8)
13284 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13285 result = self.rpc.call_x509_cert_create(instance.primary_node,
13286 constants.RIE_CERT_VALIDITY)
13287 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13289 (name, cert_pem) = result.payload
13291 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13295 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13296 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13298 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13304 class LUBackupExport(LogicalUnit):
13305 """Export an instance to an image in the cluster.
13308 HPATH = "instance-export"
13309 HTYPE = constants.HTYPE_INSTANCE
13312 def CheckArguments(self):
13313 """Check the arguments.
13316 self.x509_key_name = self.op.x509_key_name
13317 self.dest_x509_ca_pem = self.op.destination_x509_ca
13319 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13320 if not self.x509_key_name:
13321 raise errors.OpPrereqError("Missing X509 key name for encryption",
13322 errors.ECODE_INVAL)
13324 if not self.dest_x509_ca_pem:
13325 raise errors.OpPrereqError("Missing destination X509 CA",
13326 errors.ECODE_INVAL)
13328 def ExpandNames(self):
13329 self._ExpandAndLockInstance()
13331 # Lock all nodes for local exports
13332 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13333 # FIXME: lock only instance primary and destination node
13335 # Sad but true, for now we have do lock all nodes, as we don't know where
13336 # the previous export might be, and in this LU we search for it and
13337 # remove it from its current node. In the future we could fix this by:
13338 # - making a tasklet to search (share-lock all), then create the
13339 # new one, then one to remove, after
13340 # - removing the removal operation altogether
13341 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13343 def DeclareLocks(self, level):
13344 """Last minute lock declaration."""
13345 # All nodes are locked anyway, so nothing to do here.
13347 def BuildHooksEnv(self):
13348 """Build hooks env.
13350 This will run on the master, primary node and target node.
13354 "EXPORT_MODE": self.op.mode,
13355 "EXPORT_NODE": self.op.target_node,
13356 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13357 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13358 # TODO: Generic function for boolean env variables
13359 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13362 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13366 def BuildHooksNodes(self):
13367 """Build hooks nodes.
13370 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13372 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13373 nl.append(self.op.target_node)
13377 def CheckPrereq(self):
13378 """Check prerequisites.
13380 This checks that the instance and node names are valid.
13383 instance_name = self.op.instance_name
13385 self.instance = self.cfg.GetInstanceInfo(instance_name)
13386 assert self.instance is not None, \
13387 "Cannot retrieve locked instance %s" % self.op.instance_name
13388 _CheckNodeOnline(self, self.instance.primary_node)
13390 if (self.op.remove_instance and
13391 self.instance.admin_state == constants.ADMINST_UP and
13392 not self.op.shutdown):
13393 raise errors.OpPrereqError("Can not remove instance without shutting it"
13394 " down before", errors.ECODE_STATE)
13396 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13397 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13398 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13399 assert self.dst_node is not None
13401 _CheckNodeOnline(self, self.dst_node.name)
13402 _CheckNodeNotDrained(self, self.dst_node.name)
13405 self.dest_disk_info = None
13406 self.dest_x509_ca = None
13408 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13409 self.dst_node = None
13411 if len(self.op.target_node) != len(self.instance.disks):
13412 raise errors.OpPrereqError(("Received destination information for %s"
13413 " disks, but instance %s has %s disks") %
13414 (len(self.op.target_node), instance_name,
13415 len(self.instance.disks)),
13416 errors.ECODE_INVAL)
13418 cds = _GetClusterDomainSecret()
13420 # Check X509 key name
13422 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13423 except (TypeError, ValueError), err:
13424 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13425 errors.ECODE_INVAL)
13427 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13428 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13429 errors.ECODE_INVAL)
13431 # Load and verify CA
13433 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13434 except OpenSSL.crypto.Error, err:
13435 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13436 (err, ), errors.ECODE_INVAL)
13438 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13439 if errcode is not None:
13440 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13441 (msg, ), errors.ECODE_INVAL)
13443 self.dest_x509_ca = cert
13445 # Verify target information
13447 for idx, disk_data in enumerate(self.op.target_node):
13449 (host, port, magic) = \
13450 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13451 except errors.GenericError, err:
13452 raise errors.OpPrereqError("Target info for disk %s: %s" %
13453 (idx, err), errors.ECODE_INVAL)
13455 disk_info.append((host, port, magic))
13457 assert len(disk_info) == len(self.op.target_node)
13458 self.dest_disk_info = disk_info
13461 raise errors.ProgrammerError("Unhandled export mode %r" %
13464 # instance disk type verification
13465 # TODO: Implement export support for file-based disks
13466 for disk in self.instance.disks:
13467 if disk.dev_type == constants.LD_FILE:
13468 raise errors.OpPrereqError("Export not supported for instances with"
13469 " file-based disks", errors.ECODE_INVAL)
13471 def _CleanupExports(self, feedback_fn):
13472 """Removes exports of current instance from all other nodes.
13474 If an instance in a cluster with nodes A..D was exported to node C, its
13475 exports will be removed from the nodes A, B and D.
13478 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13480 nodelist = self.cfg.GetNodeList()
13481 nodelist.remove(self.dst_node.name)
13483 # on one-node clusters nodelist will be empty after the removal
13484 # if we proceed the backup would be removed because OpBackupQuery
13485 # substitutes an empty list with the full cluster node list.
13486 iname = self.instance.name
13488 feedback_fn("Removing old exports for instance %s" % iname)
13489 exportlist = self.rpc.call_export_list(nodelist)
13490 for node in exportlist:
13491 if exportlist[node].fail_msg:
13493 if iname in exportlist[node].payload:
13494 msg = self.rpc.call_export_remove(node, iname).fail_msg
13496 self.LogWarning("Could not remove older export for instance %s"
13497 " on node %s: %s", iname, node, msg)
13499 def Exec(self, feedback_fn):
13500 """Export an instance to an image in the cluster.
13503 assert self.op.mode in constants.EXPORT_MODES
13505 instance = self.instance
13506 src_node = instance.primary_node
13508 if self.op.shutdown:
13509 # shutdown the instance, but not the disks
13510 feedback_fn("Shutting down instance %s" % instance.name)
13511 result = self.rpc.call_instance_shutdown(src_node, instance,
13512 self.op.shutdown_timeout)
13513 # TODO: Maybe ignore failures if ignore_remove_failures is set
13514 result.Raise("Could not shutdown instance %s on"
13515 " node %s" % (instance.name, src_node))
13517 # set the disks ID correctly since call_instance_start needs the
13518 # correct drbd minor to create the symlinks
13519 for disk in instance.disks:
13520 self.cfg.SetDiskID(disk, src_node)
13522 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13525 # Activate the instance disks if we'exporting a stopped instance
13526 feedback_fn("Activating disks for %s" % instance.name)
13527 _StartInstanceDisks(self, instance, None)
13530 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13533 helper.CreateSnapshots()
13535 if (self.op.shutdown and
13536 instance.admin_state == constants.ADMINST_UP and
13537 not self.op.remove_instance):
13538 assert not activate_disks
13539 feedback_fn("Starting instance %s" % instance.name)
13540 result = self.rpc.call_instance_start(src_node,
13541 (instance, None, None), False)
13542 msg = result.fail_msg
13544 feedback_fn("Failed to start instance: %s" % msg)
13545 _ShutdownInstanceDisks(self, instance)
13546 raise errors.OpExecError("Could not start instance: %s" % msg)
13548 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13549 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13550 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13551 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13552 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13554 (key_name, _, _) = self.x509_key_name
13557 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13560 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13561 key_name, dest_ca_pem,
13566 # Check for backwards compatibility
13567 assert len(dresults) == len(instance.disks)
13568 assert compat.all(isinstance(i, bool) for i in dresults), \
13569 "Not all results are boolean: %r" % dresults
13573 feedback_fn("Deactivating disks for %s" % instance.name)
13574 _ShutdownInstanceDisks(self, instance)
13576 if not (compat.all(dresults) and fin_resu):
13579 failures.append("export finalization")
13580 if not compat.all(dresults):
13581 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13583 failures.append("disk export: disk(s) %s" % fdsk)
13585 raise errors.OpExecError("Export failed, errors in %s" %
13586 utils.CommaJoin(failures))
13588 # At this point, the export was successful, we can cleanup/finish
13590 # Remove instance if requested
13591 if self.op.remove_instance:
13592 feedback_fn("Removing instance %s" % instance.name)
13593 _RemoveInstance(self, feedback_fn, instance,
13594 self.op.ignore_remove_failures)
13596 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13597 self._CleanupExports(feedback_fn)
13599 return fin_resu, dresults
13602 class LUBackupRemove(NoHooksLU):
13603 """Remove exports related to the named instance.
13608 def ExpandNames(self):
13609 self.needed_locks = {}
13610 # We need all nodes to be locked in order for RemoveExport to work, but we
13611 # don't need to lock the instance itself, as nothing will happen to it (and
13612 # we can remove exports also for a removed instance)
13613 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13615 def Exec(self, feedback_fn):
13616 """Remove any export.
13619 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13620 # If the instance was not found we'll try with the name that was passed in.
13621 # This will only work if it was an FQDN, though.
13623 if not instance_name:
13625 instance_name = self.op.instance_name
13627 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13628 exportlist = self.rpc.call_export_list(locked_nodes)
13630 for node in exportlist:
13631 msg = exportlist[node].fail_msg
13633 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13635 if instance_name in exportlist[node].payload:
13637 result = self.rpc.call_export_remove(node, instance_name)
13638 msg = result.fail_msg
13640 logging.error("Could not remove export for instance %s"
13641 " on node %s: %s", instance_name, node, msg)
13643 if fqdn_warn and not found:
13644 feedback_fn("Export not found. If trying to remove an export belonging"
13645 " to a deleted instance please use its Fully Qualified"
13649 class LUGroupAdd(LogicalUnit):
13650 """Logical unit for creating node groups.
13653 HPATH = "group-add"
13654 HTYPE = constants.HTYPE_GROUP
13657 def ExpandNames(self):
13658 # We need the new group's UUID here so that we can create and acquire the
13659 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13660 # that it should not check whether the UUID exists in the configuration.
13661 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13662 self.needed_locks = {}
13663 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13665 def CheckPrereq(self):
13666 """Check prerequisites.
13668 This checks that the given group name is not an existing node group
13673 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13674 except errors.OpPrereqError:
13677 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13678 " node group (UUID: %s)" %
13679 (self.op.group_name, existing_uuid),
13680 errors.ECODE_EXISTS)
13682 if self.op.ndparams:
13683 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13685 if self.op.hv_state:
13686 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13688 self.new_hv_state = None
13690 if self.op.disk_state:
13691 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13693 self.new_disk_state = None
13695 if self.op.diskparams:
13696 for templ in constants.DISK_TEMPLATES:
13697 if templ in self.op.diskparams:
13698 utils.ForceDictType(self.op.diskparams[templ],
13699 constants.DISK_DT_TYPES)
13700 self.new_diskparams = self.op.diskparams
13702 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13703 except errors.OpPrereqError, err:
13704 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13705 errors.ECODE_INVAL)
13707 self.new_diskparams = {}
13709 if self.op.ipolicy:
13710 cluster = self.cfg.GetClusterInfo()
13711 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13713 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13714 except errors.ConfigurationError, err:
13715 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13716 errors.ECODE_INVAL)
13718 def BuildHooksEnv(self):
13719 """Build hooks env.
13723 "GROUP_NAME": self.op.group_name,
13726 def BuildHooksNodes(self):
13727 """Build hooks nodes.
13730 mn = self.cfg.GetMasterNode()
13731 return ([mn], [mn])
13733 def Exec(self, feedback_fn):
13734 """Add the node group to the cluster.
13737 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13738 uuid=self.group_uuid,
13739 alloc_policy=self.op.alloc_policy,
13740 ndparams=self.op.ndparams,
13741 diskparams=self.new_diskparams,
13742 ipolicy=self.op.ipolicy,
13743 hv_state_static=self.new_hv_state,
13744 disk_state_static=self.new_disk_state)
13746 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13747 del self.remove_locks[locking.LEVEL_NODEGROUP]
13750 class LUGroupAssignNodes(NoHooksLU):
13751 """Logical unit for assigning nodes to groups.
13756 def ExpandNames(self):
13757 # These raise errors.OpPrereqError on their own:
13758 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13759 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13761 # We want to lock all the affected nodes and groups. We have readily
13762 # available the list of nodes, and the *destination* group. To gather the
13763 # list of "source" groups, we need to fetch node information later on.
13764 self.needed_locks = {
13765 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13766 locking.LEVEL_NODE: self.op.nodes,
13769 def DeclareLocks(self, level):
13770 if level == locking.LEVEL_NODEGROUP:
13771 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13773 # Try to get all affected nodes' groups without having the group or node
13774 # lock yet. Needs verification later in the code flow.
13775 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13777 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13779 def CheckPrereq(self):
13780 """Check prerequisites.
13783 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13784 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13785 frozenset(self.op.nodes))
13787 expected_locks = (set([self.group_uuid]) |
13788 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13789 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13790 if actual_locks != expected_locks:
13791 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13792 " current groups are '%s', used to be '%s'" %
13793 (utils.CommaJoin(expected_locks),
13794 utils.CommaJoin(actual_locks)))
13796 self.node_data = self.cfg.GetAllNodesInfo()
13797 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13798 instance_data = self.cfg.GetAllInstancesInfo()
13800 if self.group is None:
13801 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13802 (self.op.group_name, self.group_uuid))
13804 (new_splits, previous_splits) = \
13805 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13806 for node in self.op.nodes],
13807 self.node_data, instance_data)
13810 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13812 if not self.op.force:
13813 raise errors.OpExecError("The following instances get split by this"
13814 " change and --force was not given: %s" %
13817 self.LogWarning("This operation will split the following instances: %s",
13820 if previous_splits:
13821 self.LogWarning("In addition, these already-split instances continue"
13822 " to be split across groups: %s",
13823 utils.CommaJoin(utils.NiceSort(previous_splits)))
13825 def Exec(self, feedback_fn):
13826 """Assign nodes to a new group.
13829 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13831 self.cfg.AssignGroupNodes(mods)
13834 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13835 """Check for split instances after a node assignment.
13837 This method considers a series of node assignments as an atomic operation,
13838 and returns information about split instances after applying the set of
13841 In particular, it returns information about newly split instances, and
13842 instances that were already split, and remain so after the change.
13844 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13847 @type changes: list of (node_name, new_group_uuid) pairs.
13848 @param changes: list of node assignments to consider.
13849 @param node_data: a dict with data for all nodes
13850 @param instance_data: a dict with all instances to consider
13851 @rtype: a two-tuple
13852 @return: a list of instances that were previously okay and result split as a
13853 consequence of this change, and a list of instances that were previously
13854 split and this change does not fix.
13857 changed_nodes = dict((node, group) for node, group in changes
13858 if node_data[node].group != group)
13860 all_split_instances = set()
13861 previously_split_instances = set()
13863 def InstanceNodes(instance):
13864 return [instance.primary_node] + list(instance.secondary_nodes)
13866 for inst in instance_data.values():
13867 if inst.disk_template not in constants.DTS_INT_MIRROR:
13870 instance_nodes = InstanceNodes(inst)
13872 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13873 previously_split_instances.add(inst.name)
13875 if len(set(changed_nodes.get(node, node_data[node].group)
13876 for node in instance_nodes)) > 1:
13877 all_split_instances.add(inst.name)
13879 return (list(all_split_instances - previously_split_instances),
13880 list(previously_split_instances & all_split_instances))
13883 class _GroupQuery(_QueryBase):
13884 FIELDS = query.GROUP_FIELDS
13886 def ExpandNames(self, lu):
13887 lu.needed_locks = {}
13889 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13890 self._cluster = lu.cfg.GetClusterInfo()
13891 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13894 self.wanted = [name_to_uuid[name]
13895 for name in utils.NiceSort(name_to_uuid.keys())]
13897 # Accept names to be either names or UUIDs.
13900 all_uuid = frozenset(self._all_groups.keys())
13902 for name in self.names:
13903 if name in all_uuid:
13904 self.wanted.append(name)
13905 elif name in name_to_uuid:
13906 self.wanted.append(name_to_uuid[name])
13908 missing.append(name)
13911 raise errors.OpPrereqError("Some groups do not exist: %s" %
13912 utils.CommaJoin(missing),
13913 errors.ECODE_NOENT)
13915 def DeclareLocks(self, lu, level):
13918 def _GetQueryData(self, lu):
13919 """Computes the list of node groups and their attributes.
13922 do_nodes = query.GQ_NODE in self.requested_data
13923 do_instances = query.GQ_INST in self.requested_data
13925 group_to_nodes = None
13926 group_to_instances = None
13928 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13929 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13930 # latter GetAllInstancesInfo() is not enough, for we have to go through
13931 # instance->node. Hence, we will need to process nodes even if we only need
13932 # instance information.
13933 if do_nodes or do_instances:
13934 all_nodes = lu.cfg.GetAllNodesInfo()
13935 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13938 for node in all_nodes.values():
13939 if node.group in group_to_nodes:
13940 group_to_nodes[node.group].append(node.name)
13941 node_to_group[node.name] = node.group
13944 all_instances = lu.cfg.GetAllInstancesInfo()
13945 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13947 for instance in all_instances.values():
13948 node = instance.primary_node
13949 if node in node_to_group:
13950 group_to_instances[node_to_group[node]].append(instance.name)
13953 # Do not pass on node information if it was not requested.
13954 group_to_nodes = None
13956 return query.GroupQueryData(self._cluster,
13957 [self._all_groups[uuid]
13958 for uuid in self.wanted],
13959 group_to_nodes, group_to_instances,
13960 query.GQ_DISKPARAMS in self.requested_data)
13963 class LUGroupQuery(NoHooksLU):
13964 """Logical unit for querying node groups.
13969 def CheckArguments(self):
13970 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13971 self.op.output_fields, False)
13973 def ExpandNames(self):
13974 self.gq.ExpandNames(self)
13976 def DeclareLocks(self, level):
13977 self.gq.DeclareLocks(self, level)
13979 def Exec(self, feedback_fn):
13980 return self.gq.OldStyleQuery(self)
13983 class LUGroupSetParams(LogicalUnit):
13984 """Modifies the parameters of a node group.
13987 HPATH = "group-modify"
13988 HTYPE = constants.HTYPE_GROUP
13991 def CheckArguments(self):
13994 self.op.diskparams,
13995 self.op.alloc_policy,
13997 self.op.disk_state,
14001 if all_changes.count(None) == len(all_changes):
14002 raise errors.OpPrereqError("Please pass at least one modification",
14003 errors.ECODE_INVAL)
14005 def ExpandNames(self):
14006 # This raises errors.OpPrereqError on its own:
14007 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14009 self.needed_locks = {
14010 locking.LEVEL_INSTANCE: [],
14011 locking.LEVEL_NODEGROUP: [self.group_uuid],
14014 self.share_locks[locking.LEVEL_INSTANCE] = 1
14016 def DeclareLocks(self, level):
14017 if level == locking.LEVEL_INSTANCE:
14018 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14020 # Lock instances optimistically, needs verification once group lock has
14022 self.needed_locks[locking.LEVEL_INSTANCE] = \
14023 self.cfg.GetNodeGroupInstances(self.group_uuid)
14026 def _UpdateAndVerifyDiskParams(old, new):
14027 """Updates and verifies disk parameters.
14030 new_params = _GetUpdatedParams(old, new)
14031 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14034 def CheckPrereq(self):
14035 """Check prerequisites.
14038 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14040 # Check if locked instances are still correct
14041 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14043 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14044 cluster = self.cfg.GetClusterInfo()
14046 if self.group is None:
14047 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14048 (self.op.group_name, self.group_uuid))
14050 if self.op.ndparams:
14051 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14052 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14053 self.new_ndparams = new_ndparams
14055 if self.op.diskparams:
14056 diskparams = self.group.diskparams
14057 uavdp = self._UpdateAndVerifyDiskParams
14058 # For each disktemplate subdict update and verify the values
14059 new_diskparams = dict((dt,
14060 uavdp(diskparams.get(dt, {}),
14061 self.op.diskparams[dt]))
14062 for dt in constants.DISK_TEMPLATES
14063 if dt in self.op.diskparams)
14064 # As we've all subdicts of diskparams ready, lets merge the actual
14065 # dict with all updated subdicts
14066 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14068 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14069 except errors.OpPrereqError, err:
14070 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14071 errors.ECODE_INVAL)
14073 if self.op.hv_state:
14074 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14075 self.group.hv_state_static)
14077 if self.op.disk_state:
14078 self.new_disk_state = \
14079 _MergeAndVerifyDiskState(self.op.disk_state,
14080 self.group.disk_state_static)
14082 if self.op.ipolicy:
14083 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14087 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14088 inst_filter = lambda inst: inst.name in owned_instances
14089 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14090 gmi = ganeti.masterd.instance
14092 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14094 new_ipolicy, instances)
14097 self.LogWarning("After the ipolicy change the following instances"
14098 " violate them: %s",
14099 utils.CommaJoin(violations))
14101 def BuildHooksEnv(self):
14102 """Build hooks env.
14106 "GROUP_NAME": self.op.group_name,
14107 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14110 def BuildHooksNodes(self):
14111 """Build hooks nodes.
14114 mn = self.cfg.GetMasterNode()
14115 return ([mn], [mn])
14117 def Exec(self, feedback_fn):
14118 """Modifies the node group.
14123 if self.op.ndparams:
14124 self.group.ndparams = self.new_ndparams
14125 result.append(("ndparams", str(self.group.ndparams)))
14127 if self.op.diskparams:
14128 self.group.diskparams = self.new_diskparams
14129 result.append(("diskparams", str(self.group.diskparams)))
14131 if self.op.alloc_policy:
14132 self.group.alloc_policy = self.op.alloc_policy
14134 if self.op.hv_state:
14135 self.group.hv_state_static = self.new_hv_state
14137 if self.op.disk_state:
14138 self.group.disk_state_static = self.new_disk_state
14140 if self.op.ipolicy:
14141 self.group.ipolicy = self.new_ipolicy
14143 self.cfg.Update(self.group, feedback_fn)
14147 class LUGroupRemove(LogicalUnit):
14148 HPATH = "group-remove"
14149 HTYPE = constants.HTYPE_GROUP
14152 def ExpandNames(self):
14153 # This will raises errors.OpPrereqError on its own:
14154 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14155 self.needed_locks = {
14156 locking.LEVEL_NODEGROUP: [self.group_uuid],
14159 def CheckPrereq(self):
14160 """Check prerequisites.
14162 This checks that the given group name exists as a node group, that is
14163 empty (i.e., contains no nodes), and that is not the last group of the
14167 # Verify that the group is empty.
14168 group_nodes = [node.name
14169 for node in self.cfg.GetAllNodesInfo().values()
14170 if node.group == self.group_uuid]
14173 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14175 (self.op.group_name,
14176 utils.CommaJoin(utils.NiceSort(group_nodes))),
14177 errors.ECODE_STATE)
14179 # Verify the cluster would not be left group-less.
14180 if len(self.cfg.GetNodeGroupList()) == 1:
14181 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14182 " removed" % self.op.group_name,
14183 errors.ECODE_STATE)
14185 def BuildHooksEnv(self):
14186 """Build hooks env.
14190 "GROUP_NAME": self.op.group_name,
14193 def BuildHooksNodes(self):
14194 """Build hooks nodes.
14197 mn = self.cfg.GetMasterNode()
14198 return ([mn], [mn])
14200 def Exec(self, feedback_fn):
14201 """Remove the node group.
14205 self.cfg.RemoveNodeGroup(self.group_uuid)
14206 except errors.ConfigurationError:
14207 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14208 (self.op.group_name, self.group_uuid))
14210 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14213 class LUGroupRename(LogicalUnit):
14214 HPATH = "group-rename"
14215 HTYPE = constants.HTYPE_GROUP
14218 def ExpandNames(self):
14219 # This raises errors.OpPrereqError on its own:
14220 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14222 self.needed_locks = {
14223 locking.LEVEL_NODEGROUP: [self.group_uuid],
14226 def CheckPrereq(self):
14227 """Check prerequisites.
14229 Ensures requested new name is not yet used.
14233 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14234 except errors.OpPrereqError:
14237 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14238 " node group (UUID: %s)" %
14239 (self.op.new_name, new_name_uuid),
14240 errors.ECODE_EXISTS)
14242 def BuildHooksEnv(self):
14243 """Build hooks env.
14247 "OLD_NAME": self.op.group_name,
14248 "NEW_NAME": self.op.new_name,
14251 def BuildHooksNodes(self):
14252 """Build hooks nodes.
14255 mn = self.cfg.GetMasterNode()
14257 all_nodes = self.cfg.GetAllNodesInfo()
14258 all_nodes.pop(mn, None)
14261 run_nodes.extend(node.name for node in all_nodes.values()
14262 if node.group == self.group_uuid)
14264 return (run_nodes, run_nodes)
14266 def Exec(self, feedback_fn):
14267 """Rename the node group.
14270 group = self.cfg.GetNodeGroup(self.group_uuid)
14273 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14274 (self.op.group_name, self.group_uuid))
14276 group.name = self.op.new_name
14277 self.cfg.Update(group, feedback_fn)
14279 return self.op.new_name
14282 class LUGroupEvacuate(LogicalUnit):
14283 HPATH = "group-evacuate"
14284 HTYPE = constants.HTYPE_GROUP
14287 def ExpandNames(self):
14288 # This raises errors.OpPrereqError on its own:
14289 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14291 if self.op.target_groups:
14292 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14293 self.op.target_groups)
14295 self.req_target_uuids = []
14297 if self.group_uuid in self.req_target_uuids:
14298 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14299 " as a target group (targets are %s)" %
14301 utils.CommaJoin(self.req_target_uuids)),
14302 errors.ECODE_INVAL)
14304 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14306 self.share_locks = _ShareAll()
14307 self.needed_locks = {
14308 locking.LEVEL_INSTANCE: [],
14309 locking.LEVEL_NODEGROUP: [],
14310 locking.LEVEL_NODE: [],
14313 def DeclareLocks(self, level):
14314 if level == locking.LEVEL_INSTANCE:
14315 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14317 # Lock instances optimistically, needs verification once node and group
14318 # locks have been acquired
14319 self.needed_locks[locking.LEVEL_INSTANCE] = \
14320 self.cfg.GetNodeGroupInstances(self.group_uuid)
14322 elif level == locking.LEVEL_NODEGROUP:
14323 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14325 if self.req_target_uuids:
14326 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14328 # Lock all groups used by instances optimistically; this requires going
14329 # via the node before it's locked, requiring verification later on
14330 lock_groups.update(group_uuid
14331 for instance_name in
14332 self.owned_locks(locking.LEVEL_INSTANCE)
14334 self.cfg.GetInstanceNodeGroups(instance_name))
14336 # No target groups, need to lock all of them
14337 lock_groups = locking.ALL_SET
14339 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14341 elif level == locking.LEVEL_NODE:
14342 # This will only lock the nodes in the group to be evacuated which
14343 # contain actual instances
14344 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14345 self._LockInstancesNodes()
14347 # Lock all nodes in group to be evacuated and target groups
14348 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14349 assert self.group_uuid in owned_groups
14350 member_nodes = [node_name
14351 for group in owned_groups
14352 for node_name in self.cfg.GetNodeGroup(group).members]
14353 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14355 def CheckPrereq(self):
14356 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14357 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14358 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14360 assert owned_groups.issuperset(self.req_target_uuids)
14361 assert self.group_uuid in owned_groups
14363 # Check if locked instances are still correct
14364 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14366 # Get instance information
14367 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14369 # Check if node groups for locked instances are still correct
14370 _CheckInstancesNodeGroups(self.cfg, self.instances,
14371 owned_groups, owned_nodes, self.group_uuid)
14373 if self.req_target_uuids:
14374 # User requested specific target groups
14375 self.target_uuids = self.req_target_uuids
14377 # All groups except the one to be evacuated are potential targets
14378 self.target_uuids = [group_uuid for group_uuid in owned_groups
14379 if group_uuid != self.group_uuid]
14381 if not self.target_uuids:
14382 raise errors.OpPrereqError("There are no possible target groups",
14383 errors.ECODE_INVAL)
14385 def BuildHooksEnv(self):
14386 """Build hooks env.
14390 "GROUP_NAME": self.op.group_name,
14391 "TARGET_GROUPS": " ".join(self.target_uuids),
14394 def BuildHooksNodes(self):
14395 """Build hooks nodes.
14398 mn = self.cfg.GetMasterNode()
14400 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14402 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14404 return (run_nodes, run_nodes)
14406 def Exec(self, feedback_fn):
14407 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14409 assert self.group_uuid not in self.target_uuids
14411 req = iallocator.IAReqGroupChange(instances=instances,
14412 target_groups=self.target_uuids)
14413 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14415 ial.Run(self.op.iallocator)
14417 if not ial.success:
14418 raise errors.OpPrereqError("Can't compute group evacuation using"
14419 " iallocator '%s': %s" %
14420 (self.op.iallocator, ial.info),
14421 errors.ECODE_NORES)
14423 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14425 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14426 len(jobs), self.op.group_name)
14428 return ResultWithJobs(jobs)
14431 class TagsLU(NoHooksLU): # pylint: disable=W0223
14432 """Generic tags LU.
14434 This is an abstract class which is the parent of all the other tags LUs.
14437 def ExpandNames(self):
14438 self.group_uuid = None
14439 self.needed_locks = {}
14441 if self.op.kind == constants.TAG_NODE:
14442 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14443 lock_level = locking.LEVEL_NODE
14444 lock_name = self.op.name
14445 elif self.op.kind == constants.TAG_INSTANCE:
14446 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14447 lock_level = locking.LEVEL_INSTANCE
14448 lock_name = self.op.name
14449 elif self.op.kind == constants.TAG_NODEGROUP:
14450 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14451 lock_level = locking.LEVEL_NODEGROUP
14452 lock_name = self.group_uuid
14457 if lock_level and getattr(self.op, "use_locking", True):
14458 self.needed_locks[lock_level] = lock_name
14460 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14461 # not possible to acquire the BGL based on opcode parameters)
14463 def CheckPrereq(self):
14464 """Check prerequisites.
14467 if self.op.kind == constants.TAG_CLUSTER:
14468 self.target = self.cfg.GetClusterInfo()
14469 elif self.op.kind == constants.TAG_NODE:
14470 self.target = self.cfg.GetNodeInfo(self.op.name)
14471 elif self.op.kind == constants.TAG_INSTANCE:
14472 self.target = self.cfg.GetInstanceInfo(self.op.name)
14473 elif self.op.kind == constants.TAG_NODEGROUP:
14474 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14476 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14477 str(self.op.kind), errors.ECODE_INVAL)
14480 class LUTagsGet(TagsLU):
14481 """Returns the tags of a given object.
14486 def ExpandNames(self):
14487 TagsLU.ExpandNames(self)
14489 # Share locks as this is only a read operation
14490 self.share_locks = _ShareAll()
14492 def Exec(self, feedback_fn):
14493 """Returns the tag list.
14496 return list(self.target.GetTags())
14499 class LUTagsSearch(NoHooksLU):
14500 """Searches the tags for a given pattern.
14505 def ExpandNames(self):
14506 self.needed_locks = {}
14508 def CheckPrereq(self):
14509 """Check prerequisites.
14511 This checks the pattern passed for validity by compiling it.
14515 self.re = re.compile(self.op.pattern)
14516 except re.error, err:
14517 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14518 (self.op.pattern, err), errors.ECODE_INVAL)
14520 def Exec(self, feedback_fn):
14521 """Returns the tag list.
14525 tgts = [("/cluster", cfg.GetClusterInfo())]
14526 ilist = cfg.GetAllInstancesInfo().values()
14527 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14528 nlist = cfg.GetAllNodesInfo().values()
14529 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14530 tgts.extend(("/nodegroup/%s" % n.name, n)
14531 for n in cfg.GetAllNodeGroupsInfo().values())
14533 for path, target in tgts:
14534 for tag in target.GetTags():
14535 if self.re.search(tag):
14536 results.append((path, tag))
14540 class LUTagsSet(TagsLU):
14541 """Sets a tag on a given object.
14546 def CheckPrereq(self):
14547 """Check prerequisites.
14549 This checks the type and length of the tag name and value.
14552 TagsLU.CheckPrereq(self)
14553 for tag in self.op.tags:
14554 objects.TaggableObject.ValidateTag(tag)
14556 def Exec(self, feedback_fn):
14561 for tag in self.op.tags:
14562 self.target.AddTag(tag)
14563 except errors.TagError, err:
14564 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14565 self.cfg.Update(self.target, feedback_fn)
14568 class LUTagsDel(TagsLU):
14569 """Delete a list of tags from a given object.
14574 def CheckPrereq(self):
14575 """Check prerequisites.
14577 This checks that we have the given tag.
14580 TagsLU.CheckPrereq(self)
14581 for tag in self.op.tags:
14582 objects.TaggableObject.ValidateTag(tag)
14583 del_tags = frozenset(self.op.tags)
14584 cur_tags = self.target.GetTags()
14586 diff_tags = del_tags - cur_tags
14588 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14589 raise errors.OpPrereqError("Tag(s) %s not found" %
14590 (utils.CommaJoin(diff_names), ),
14591 errors.ECODE_NOENT)
14593 def Exec(self, feedback_fn):
14594 """Remove the tag from the object.
14597 for tag in self.op.tags:
14598 self.target.RemoveTag(tag)
14599 self.cfg.Update(self.target, feedback_fn)
14602 class LUTestDelay(NoHooksLU):
14603 """Sleep for a specified amount of time.
14605 This LU sleeps on the master and/or nodes for a specified amount of
14611 def ExpandNames(self):
14612 """Expand names and set required locks.
14614 This expands the node list, if any.
14617 self.needed_locks = {}
14618 if self.op.on_nodes:
14619 # _GetWantedNodes can be used here, but is not always appropriate to use
14620 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14621 # more information.
14622 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14623 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14625 def _TestDelay(self):
14626 """Do the actual sleep.
14629 if self.op.on_master:
14630 if not utils.TestDelay(self.op.duration):
14631 raise errors.OpExecError("Error during master delay test")
14632 if self.op.on_nodes:
14633 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14634 for node, node_result in result.items():
14635 node_result.Raise("Failure during rpc call to node %s" % node)
14637 def Exec(self, feedback_fn):
14638 """Execute the test delay opcode, with the wanted repetitions.
14641 if self.op.repeat == 0:
14644 top_value = self.op.repeat - 1
14645 for i in range(self.op.repeat):
14646 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14650 class LUTestJqueue(NoHooksLU):
14651 """Utility LU to test some aspects of the job queue.
14656 # Must be lower than default timeout for WaitForJobChange to see whether it
14657 # notices changed jobs
14658 _CLIENT_CONNECT_TIMEOUT = 20.0
14659 _CLIENT_CONFIRM_TIMEOUT = 60.0
14662 def _NotifyUsingSocket(cls, cb, errcls):
14663 """Opens a Unix socket and waits for another program to connect.
14666 @param cb: Callback to send socket name to client
14667 @type errcls: class
14668 @param errcls: Exception class to use for errors
14671 # Using a temporary directory as there's no easy way to create temporary
14672 # sockets without writing a custom loop around tempfile.mktemp and
14674 tmpdir = tempfile.mkdtemp()
14676 tmpsock = utils.PathJoin(tmpdir, "sock")
14678 logging.debug("Creating temporary socket at %s", tmpsock)
14679 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14684 # Send details to client
14687 # Wait for client to connect before continuing
14688 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14690 (conn, _) = sock.accept()
14691 except socket.error, err:
14692 raise errcls("Client didn't connect in time (%s)" % err)
14696 # Remove as soon as client is connected
14697 shutil.rmtree(tmpdir)
14699 # Wait for client to close
14702 # pylint: disable=E1101
14703 # Instance of '_socketobject' has no ... member
14704 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14706 except socket.error, err:
14707 raise errcls("Client failed to confirm notification (%s)" % err)
14711 def _SendNotification(self, test, arg, sockname):
14712 """Sends a notification to the client.
14715 @param test: Test name
14716 @param arg: Test argument (depends on test)
14717 @type sockname: string
14718 @param sockname: Socket path
14721 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14723 def _Notify(self, prereq, test, arg):
14724 """Notifies the client of a test.
14727 @param prereq: Whether this is a prereq-phase test
14729 @param test: Test name
14730 @param arg: Test argument (depends on test)
14734 errcls = errors.OpPrereqError
14736 errcls = errors.OpExecError
14738 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14742 def CheckArguments(self):
14743 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14744 self.expandnames_calls = 0
14746 def ExpandNames(self):
14747 checkargs_calls = getattr(self, "checkargs_calls", 0)
14748 if checkargs_calls < 1:
14749 raise errors.ProgrammerError("CheckArguments was not called")
14751 self.expandnames_calls += 1
14753 if self.op.notify_waitlock:
14754 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14756 self.LogInfo("Expanding names")
14758 # Get lock on master node (just to get a lock, not for a particular reason)
14759 self.needed_locks = {
14760 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14763 def Exec(self, feedback_fn):
14764 if self.expandnames_calls < 1:
14765 raise errors.ProgrammerError("ExpandNames was not called")
14767 if self.op.notify_exec:
14768 self._Notify(False, constants.JQT_EXEC, None)
14770 self.LogInfo("Executing")
14772 if self.op.log_messages:
14773 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14774 for idx, msg in enumerate(self.op.log_messages):
14775 self.LogInfo("Sending log message %s", idx + 1)
14776 feedback_fn(constants.JQT_MSGPREFIX + msg)
14777 # Report how many test messages have been sent
14778 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14781 raise errors.OpExecError("Opcode failure was requested")
14786 class LUTestAllocator(NoHooksLU):
14787 """Run allocator tests.
14789 This LU runs the allocator tests
14792 def CheckPrereq(self):
14793 """Check prerequisites.
14795 This checks the opcode parameters depending on the director and mode test.
14798 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
14799 constants.IALLOCATOR_MODE_MULTI_ALLOC):
14800 for attr in ["memory", "disks", "disk_template",
14801 "os", "tags", "nics", "vcpus"]:
14802 if not hasattr(self.op, attr):
14803 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14804 attr, errors.ECODE_INVAL)
14805 iname = self.cfg.ExpandInstanceName(self.op.name)
14806 if iname is not None:
14807 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14808 iname, errors.ECODE_EXISTS)
14809 if not isinstance(self.op.nics, list):
14810 raise errors.OpPrereqError("Invalid parameter 'nics'",
14811 errors.ECODE_INVAL)
14812 if not isinstance(self.op.disks, list):
14813 raise errors.OpPrereqError("Invalid parameter 'disks'",
14814 errors.ECODE_INVAL)
14815 for row in self.op.disks:
14816 if (not isinstance(row, dict) or
14817 constants.IDISK_SIZE not in row or
14818 not isinstance(row[constants.IDISK_SIZE], int) or
14819 constants.IDISK_MODE not in row or
14820 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14821 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14822 " parameter", errors.ECODE_INVAL)
14823 if self.op.hypervisor is None:
14824 self.op.hypervisor = self.cfg.GetHypervisorType()
14825 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14826 fname = _ExpandInstanceName(self.cfg, self.op.name)
14827 self.op.name = fname
14828 self.relocate_from = \
14829 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14830 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14831 constants.IALLOCATOR_MODE_NODE_EVAC):
14832 if not self.op.instances:
14833 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14834 self.op.instances = _GetWantedInstances(self, self.op.instances)
14836 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14837 self.op.mode, errors.ECODE_INVAL)
14839 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14840 if self.op.allocator is None:
14841 raise errors.OpPrereqError("Missing allocator name",
14842 errors.ECODE_INVAL)
14843 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14844 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14845 self.op.direction, errors.ECODE_INVAL)
14847 def Exec(self, feedback_fn):
14848 """Run the allocator test.
14851 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14852 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
14853 memory=self.op.memory,
14854 disks=self.op.disks,
14855 disk_template=self.op.disk_template,
14859 vcpus=self.op.vcpus,
14860 spindle_use=self.op.spindle_use,
14861 hypervisor=self.op.hypervisor)
14862 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14863 req = iallocator.IAReqRelocate(name=self.op.name,
14864 relocate_from=list(self.relocate_from))
14865 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14866 req = iallocator.IAReqGroupChange(instances=self.op.instances,
14867 target_groups=self.op.target_groups)
14868 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14869 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
14870 evac_mode=self.op.evac_mode)
14871 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
14872 disk_template = self.op.disk_template
14873 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
14874 memory=self.op.memory,
14875 disks=self.op.disks,
14876 disk_template=disk_template,
14880 vcpus=self.op.vcpus,
14881 spindle_use=self.op.spindle_use,
14882 hypervisor=self.op.hypervisor)
14883 for idx in range(self.op.count)]
14884 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
14886 raise errors.ProgrammerError("Uncatched mode %s in"
14887 " LUTestAllocator.Exec", self.op.mode)
14889 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14890 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14891 result = ial.in_text
14893 ial.Run(self.op.allocator, validate=False)
14894 result = ial.out_text
14898 #: Query type implementations
14900 constants.QR_CLUSTER: _ClusterQuery,
14901 constants.QR_INSTANCE: _InstanceQuery,
14902 constants.QR_NODE: _NodeQuery,
14903 constants.QR_GROUP: _GroupQuery,
14904 constants.QR_OS: _OsQuery,
14905 constants.QR_EXPORT: _ExportQuery,
14908 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14911 def _GetQueryImplementation(name):
14912 """Returns the implemtnation for a query type.
14914 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14918 return _QUERY_IMPL[name]
14920 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14921 errors.ECODE_INVAL)