4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti.masterd import iallocator
64 import ganeti.masterd.instance # pylint: disable=W0611
68 INSTANCE_DOWN = [constants.ADMINST_DOWN]
69 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
70 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
72 #: Instance status in which an instance can be marked as offline/online
73 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
74 constants.ADMINST_OFFLINE,
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that
197 level (note that in this case C{DeclareLocks} won't be called
198 at all for that level)
199 - if you need locks at a level, but you can't calculate it in
200 this function, initialise that level with an empty list and do
201 further processing in L{LogicalUnit.DeclareLocks} (see that
202 function's docstring)
203 - don't put anything for the BGL level
204 - if you want all locks at a level use L{locking.ALL_SET} as a value
206 If you need to share locks (rather than acquire them exclusively) at one
207 level you can modify self.share_locks, setting a true value (usually 1) for
208 that level. By default locks are not shared.
210 This function can also define a list of tasklets, which then will be
211 executed in order instead of the usual LU-level CheckPrereq and Exec
212 functions, if those are not defined by the LU.
216 # Acquire all nodes and one instance
217 self.needed_locks = {
218 locking.LEVEL_NODE: locking.ALL_SET,
219 locking.LEVEL_INSTANCE: ['instance1.example.com'],
221 # Acquire just two nodes
222 self.needed_locks = {
223 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
226 self.needed_locks = {} # No, you can't leave it to the default value None
229 # The implementation of this method is mandatory only if the new LU is
230 # concurrent, so that old LUs don't need to be changed all at the same
233 self.needed_locks = {} # Exclusive LUs don't need locks.
235 raise NotImplementedError
237 def DeclareLocks(self, level):
238 """Declare LU locking needs for a level
240 While most LUs can just declare their locking needs at ExpandNames time,
241 sometimes there's the need to calculate some locks after having acquired
242 the ones before. This function is called just before acquiring locks at a
243 particular level, but after acquiring the ones at lower levels, and permits
244 such calculations. It can be used to modify self.needed_locks, and by
245 default it does nothing.
247 This function is only called if you have something already set in
248 self.needed_locks for the level.
250 @param level: Locking level which is going to be locked
251 @type level: member of L{ganeti.locking.LEVELS}
255 def CheckPrereq(self):
256 """Check prerequisites for this LU.
258 This method should check that the prerequisites for the execution
259 of this LU are fulfilled. It can do internode communication, but
260 it should be idempotent - no cluster or system changes are
263 The method should raise errors.OpPrereqError in case something is
264 not fulfilled. Its return value is ignored.
266 This method should also update all the parameters of the opcode to
267 their canonical form if it hasn't been done by ExpandNames before.
270 if self.tasklets is not None:
271 for (idx, tl) in enumerate(self.tasklets):
272 logging.debug("Checking prerequisites for tasklet %s/%s",
273 idx + 1, len(self.tasklets))
278 def Exec(self, feedback_fn):
281 This method should implement the actual work. It should raise
282 errors.OpExecError for failures that are somewhat dealt with in
286 if self.tasklets is not None:
287 for (idx, tl) in enumerate(self.tasklets):
288 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
291 raise NotImplementedError
293 def BuildHooksEnv(self):
294 """Build hooks environment for this LU.
297 @return: Dictionary containing the environment that will be used for
298 running the hooks for this LU. The keys of the dict must not be prefixed
299 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
300 will extend the environment with additional variables. If no environment
301 should be defined, an empty dictionary should be returned (not C{None}).
302 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
306 raise NotImplementedError
308 def BuildHooksNodes(self):
309 """Build list of nodes to run LU's hooks.
311 @rtype: tuple; (list, list)
312 @return: Tuple containing a list of node names on which the hook
313 should run before the execution and a list of node names on which the
314 hook should run after the execution. No nodes should be returned as an
315 empty list (and not None).
316 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
320 raise NotImplementedError
322 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
323 """Notify the LU about the results of its hooks.
325 This method is called every time a hooks phase is executed, and notifies
326 the Logical Unit about the hooks' result. The LU can then use it to alter
327 its result based on the hooks. By default the method does nothing and the
328 previous result is passed back unchanged but any LU can define it if it
329 wants to use the local cluster hook-scripts somehow.
331 @param phase: one of L{constants.HOOKS_PHASE_POST} or
332 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
333 @param hook_results: the results of the multi-node hooks rpc call
334 @param feedback_fn: function used send feedback back to the caller
335 @param lu_result: the previous Exec result this LU had, or None
337 @return: the new Exec result, based on the previous result
341 # API must be kept, thus we ignore the unused argument and could
342 # be a function warnings
343 # pylint: disable=W0613,R0201
346 def _ExpandAndLockInstance(self):
347 """Helper function to expand and lock an instance.
349 Many LUs that work on an instance take its name in self.op.instance_name
350 and need to expand it and then declare the expanded name for locking. This
351 function does it, and then updates self.op.instance_name to the expanded
352 name. It also initializes needed_locks as a dict, if this hasn't been done
356 if self.needed_locks is None:
357 self.needed_locks = {}
359 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
360 "_ExpandAndLockInstance called with instance-level locks set"
361 self.op.instance_name = _ExpandInstanceName(self.cfg,
362 self.op.instance_name)
363 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
365 def _LockInstancesNodes(self, primary_only=False,
366 level=locking.LEVEL_NODE):
367 """Helper function to declare instances' nodes for locking.
369 This function should be called after locking one or more instances to lock
370 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
371 with all primary or secondary nodes for instances already locked and
372 present in self.needed_locks[locking.LEVEL_INSTANCE].
374 It should be called from DeclareLocks, and for safety only works if
375 self.recalculate_locks[locking.LEVEL_NODE] is set.
377 In the future it may grow parameters to just lock some instance's nodes, or
378 to just lock primaries or secondary nodes, if needed.
380 If should be called in DeclareLocks in a way similar to::
382 if level == locking.LEVEL_NODE:
383 self._LockInstancesNodes()
385 @type primary_only: boolean
386 @param primary_only: only lock primary nodes of locked instances
387 @param level: Which lock level to use for locking nodes
390 assert level in self.recalculate_locks, \
391 "_LockInstancesNodes helper function called with no nodes to recalculate"
393 # TODO: check if we're really been called with the instance locks held
395 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
396 # future we might want to have different behaviors depending on the value
397 # of self.recalculate_locks[locking.LEVEL_NODE]
399 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
400 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
401 wanted_nodes.append(instance.primary_node)
403 wanted_nodes.extend(instance.secondary_nodes)
405 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
406 self.needed_locks[level] = wanted_nodes
407 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
408 self.needed_locks[level].extend(wanted_nodes)
410 raise errors.ProgrammerError("Unknown recalculation mode")
412 del self.recalculate_locks[level]
415 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
416 """Simple LU which runs no hooks.
418 This LU is intended as a parent for other LogicalUnits which will
419 run no hooks, in order to reduce duplicate code.
425 def BuildHooksEnv(self):
426 """Empty BuildHooksEnv for NoHooksLu.
428 This just raises an error.
431 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
433 def BuildHooksNodes(self):
434 """Empty BuildHooksNodes for NoHooksLU.
437 raise AssertionError("BuildHooksNodes called for NoHooksLU")
441 """Tasklet base class.
443 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
444 they can mix legacy code with tasklets. Locking needs to be done in the LU,
445 tasklets know nothing about locks.
447 Subclasses must follow these rules:
448 - Implement CheckPrereq
452 def __init__(self, lu):
459 def CheckPrereq(self):
460 """Check prerequisites for this tasklets.
462 This method should check whether the prerequisites for the execution of
463 this tasklet are fulfilled. It can do internode communication, but it
464 should be idempotent - no cluster or system changes are allowed.
466 The method should raise errors.OpPrereqError in case something is not
467 fulfilled. Its return value is ignored.
469 This method should also update all parameters to their canonical form if it
470 hasn't been done before.
475 def Exec(self, feedback_fn):
476 """Execute the tasklet.
478 This method should implement the actual work. It should raise
479 errors.OpExecError for failures that are somewhat dealt with in code, or
483 raise NotImplementedError
487 """Base for query utility classes.
490 #: Attribute holding field definitions
496 def __init__(self, qfilter, fields, use_locking):
497 """Initializes this class.
500 self.use_locking = use_locking
502 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
503 namefield=self.SORT_FIELD)
504 self.requested_data = self.query.RequestedData()
505 self.names = self.query.RequestedNames()
507 # Sort only if no names were requested
508 self.sort_by_name = not self.names
510 self.do_locking = None
513 def _GetNames(self, lu, all_names, lock_level):
514 """Helper function to determine names asked for in the query.
518 names = lu.owned_locks(lock_level)
522 if self.wanted == locking.ALL_SET:
523 assert not self.names
524 # caller didn't specify names, so ordering is not important
525 return utils.NiceSort(names)
527 # caller specified names and we must keep the same order
529 assert not self.do_locking or lu.glm.is_owned(lock_level)
531 missing = set(self.wanted).difference(names)
533 raise errors.OpExecError("Some items were removed before retrieving"
534 " their data: %s" % missing)
536 # Return expanded names
539 def ExpandNames(self, lu):
540 """Expand names for this query.
542 See L{LogicalUnit.ExpandNames}.
545 raise NotImplementedError()
547 def DeclareLocks(self, lu, level):
548 """Declare locks for this query.
550 See L{LogicalUnit.DeclareLocks}.
553 raise NotImplementedError()
555 def _GetQueryData(self, lu):
556 """Collects all data for this query.
558 @return: Query data object
561 raise NotImplementedError()
563 def NewStyleQuery(self, lu):
564 """Collect data and execute query.
567 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
568 sort_by_name=self.sort_by_name)
570 def OldStyleQuery(self, lu):
571 """Collect data and execute query.
574 return self.query.OldStyleQuery(self._GetQueryData(lu),
575 sort_by_name=self.sort_by_name)
579 """Returns a dict declaring all lock levels shared.
582 return dict.fromkeys(locking.LEVELS, 1)
585 def _AnnotateDiskParams(instance, devs, cfg):
586 """Little helper wrapper to the rpc annotation method.
588 @param instance: The instance object
589 @type devs: List of L{objects.Disk}
590 @param devs: The root devices (not any of its children!)
591 @param cfg: The config object
592 @returns The annotated disk copies
593 @see L{rpc.AnnotateDiskParams}
596 return rpc.AnnotateDiskParams(instance.disk_template, devs,
597 cfg.GetInstanceDiskParams(instance))
600 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
602 """Checks if node groups for locked instances are still correct.
604 @type cfg: L{config.ConfigWriter}
605 @param cfg: Cluster configuration
606 @type instances: dict; string as key, L{objects.Instance} as value
607 @param instances: Dictionary, instance name as key, instance object as value
608 @type owned_groups: iterable of string
609 @param owned_groups: List of owned groups
610 @type owned_nodes: iterable of string
611 @param owned_nodes: List of owned nodes
612 @type cur_group_uuid: string or None
613 @param cur_group_uuid: Optional group UUID to check against instance's groups
616 for (name, inst) in instances.items():
617 assert owned_nodes.issuperset(inst.all_nodes), \
618 "Instance %s's nodes changed while we kept the lock" % name
620 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
622 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
623 "Instance %s has no node in group %s" % (name, cur_group_uuid)
626 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
628 """Checks if the owned node groups are still correct for an instance.
630 @type cfg: L{config.ConfigWriter}
631 @param cfg: The cluster configuration
632 @type instance_name: string
633 @param instance_name: Instance name
634 @type owned_groups: set or frozenset
635 @param owned_groups: List of currently owned node groups
636 @type primary_only: boolean
637 @param primary_only: Whether to check node groups for only the primary node
640 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
642 if not owned_groups.issuperset(inst_groups):
643 raise errors.OpPrereqError("Instance %s's node groups changed since"
644 " locks were acquired, current groups are"
645 " are '%s', owning groups '%s'; retry the"
648 utils.CommaJoin(inst_groups),
649 utils.CommaJoin(owned_groups)),
655 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
656 """Checks if the instances in a node group are still correct.
658 @type cfg: L{config.ConfigWriter}
659 @param cfg: The cluster configuration
660 @type group_uuid: string
661 @param group_uuid: Node group UUID
662 @type owned_instances: set or frozenset
663 @param owned_instances: List of currently owned instances
666 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
667 if owned_instances != wanted_instances:
668 raise errors.OpPrereqError("Instances in node group '%s' changed since"
669 " locks were acquired, wanted '%s', have '%s';"
670 " retry the operation" %
672 utils.CommaJoin(wanted_instances),
673 utils.CommaJoin(owned_instances)),
676 return wanted_instances
679 def _SupportsOob(cfg, node):
680 """Tells if node supports OOB.
682 @type cfg: L{config.ConfigWriter}
683 @param cfg: The cluster configuration
684 @type node: L{objects.Node}
685 @param node: The node
686 @return: The OOB script if supported or an empty string otherwise
689 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
692 def _GetWantedNodes(lu, nodes):
693 """Returns list of checked and expanded node names.
695 @type lu: L{LogicalUnit}
696 @param lu: the logical unit on whose behalf we execute
698 @param nodes: list of node names or None for all nodes
700 @return: the list of nodes, sorted
701 @raise errors.ProgrammerError: if the nodes parameter is wrong type
705 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
707 return utils.NiceSort(lu.cfg.GetNodeList())
710 def _GetWantedInstances(lu, instances):
711 """Returns list of checked and expanded instance names.
713 @type lu: L{LogicalUnit}
714 @param lu: the logical unit on whose behalf we execute
715 @type instances: list
716 @param instances: list of instance names or None for all instances
718 @return: the list of instances, sorted
719 @raise errors.OpPrereqError: if the instances parameter is wrong type
720 @raise errors.OpPrereqError: if any of the passed instances is not found
724 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
726 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
730 def _GetUpdatedParams(old_params, update_dict,
731 use_default=True, use_none=False):
732 """Return the new version of a parameter dictionary.
734 @type old_params: dict
735 @param old_params: old parameters
736 @type update_dict: dict
737 @param update_dict: dict containing new parameter values, or
738 constants.VALUE_DEFAULT to reset the parameter to its default
740 @param use_default: boolean
741 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
742 values as 'to be deleted' values
743 @param use_none: boolean
744 @type use_none: whether to recognise C{None} values as 'to be
747 @return: the new parameter dictionary
750 params_copy = copy.deepcopy(old_params)
751 for key, val in update_dict.iteritems():
752 if ((use_default and val == constants.VALUE_DEFAULT) or
753 (use_none and val is None)):
759 params_copy[key] = val
763 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
764 """Return the new version of a instance policy.
766 @param group_policy: whether this policy applies to a group and thus
767 we should support removal of policy entries
770 use_none = use_default = group_policy
771 ipolicy = copy.deepcopy(old_ipolicy)
772 for key, value in new_ipolicy.items():
773 if key not in constants.IPOLICY_ALL_KEYS:
774 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
776 if key in constants.IPOLICY_ISPECS:
777 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
778 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
780 use_default=use_default)
782 if (not value or value == [constants.VALUE_DEFAULT] or
783 value == constants.VALUE_DEFAULT):
787 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
788 " on the cluster'" % key,
791 if key in constants.IPOLICY_PARAMETERS:
792 # FIXME: we assume all such values are float
794 ipolicy[key] = float(value)
795 except (TypeError, ValueError), err:
796 raise errors.OpPrereqError("Invalid value for attribute"
797 " '%s': '%s', error: %s" %
798 (key, value, err), errors.ECODE_INVAL)
800 # FIXME: we assume all others are lists; this should be redone
802 ipolicy[key] = list(value)
804 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
805 except errors.ConfigurationError, err:
806 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
811 def _UpdateAndVerifySubDict(base, updates, type_check):
812 """Updates and verifies a dict with sub dicts of the same type.
814 @param base: The dict with the old data
815 @param updates: The dict with the new data
816 @param type_check: Dict suitable to ForceDictType to verify correct types
817 @returns: A new dict with updated and verified values
821 new = _GetUpdatedParams(old, value)
822 utils.ForceDictType(new, type_check)
825 ret = copy.deepcopy(base)
826 ret.update(dict((key, fn(base.get(key, {}), value))
827 for key, value in updates.items()))
831 def _MergeAndVerifyHvState(op_input, obj_input):
832 """Combines the hv state from an opcode with the one of the object
834 @param op_input: The input dict from the opcode
835 @param obj_input: The input dict from the objects
836 @return: The verified and updated dict
840 invalid_hvs = set(op_input) - constants.HYPER_TYPES
842 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
843 " %s" % utils.CommaJoin(invalid_hvs),
845 if obj_input is None:
847 type_check = constants.HVSTS_PARAMETER_TYPES
848 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
853 def _MergeAndVerifyDiskState(op_input, obj_input):
854 """Combines the disk state from an opcode with the one of the object
856 @param op_input: The input dict from the opcode
857 @param obj_input: The input dict from the objects
858 @return: The verified and updated dict
861 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
863 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
864 utils.CommaJoin(invalid_dst),
866 type_check = constants.DSS_PARAMETER_TYPES
867 if obj_input is None:
869 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
871 for key, value in op_input.items())
876 def _ReleaseLocks(lu, level, names=None, keep=None):
877 """Releases locks owned by an LU.
879 @type lu: L{LogicalUnit}
880 @param level: Lock level
881 @type names: list or None
882 @param names: Names of locks to release
883 @type keep: list or None
884 @param keep: Names of locks to retain
887 assert not (keep is not None and names is not None), \
888 "Only one of the 'names' and the 'keep' parameters can be given"
890 if names is not None:
891 should_release = names.__contains__
893 should_release = lambda name: name not in keep
895 should_release = None
897 owned = lu.owned_locks(level)
899 # Not owning any lock at this level, do nothing
906 # Determine which locks to release
908 if should_release(name):
913 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
915 # Release just some locks
916 lu.glm.release(level, names=release)
918 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
921 lu.glm.release(level)
923 assert not lu.glm.is_owned(level), "No locks should be owned"
926 def _MapInstanceDisksToNodes(instances):
927 """Creates a map from (node, volume) to instance name.
929 @type instances: list of L{objects.Instance}
930 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
933 return dict(((node, vol), inst.name)
934 for inst in instances
935 for (node, vols) in inst.MapLVsByNode().items()
939 def _RunPostHook(lu, node_name):
940 """Runs the post-hook for an opcode on a single node.
943 hm = lu.proc.BuildHooksManager(lu)
945 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
946 except Exception, err: # pylint: disable=W0703
947 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
950 def _CheckOutputFields(static, dynamic, selected):
951 """Checks whether all selected fields are valid.
953 @type static: L{utils.FieldSet}
954 @param static: static fields set
955 @type dynamic: L{utils.FieldSet}
956 @param dynamic: dynamic fields set
963 delta = f.NonMatching(selected)
965 raise errors.OpPrereqError("Unknown output fields selected: %s"
966 % ",".join(delta), errors.ECODE_INVAL)
969 def _CheckGlobalHvParams(params):
970 """Validates that given hypervisor params are not global ones.
972 This will ensure that instances don't get customised versions of
976 used_globals = constants.HVC_GLOBALS.intersection(params)
978 msg = ("The following hypervisor parameters are global and cannot"
979 " be customized at instance level, please modify them at"
980 " cluster level: %s" % utils.CommaJoin(used_globals))
981 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
984 def _CheckNodeOnline(lu, node, msg=None):
985 """Ensure that a given node is online.
987 @param lu: the LU on behalf of which we make the check
988 @param node: the node to check
989 @param msg: if passed, should be a message to replace the default one
990 @raise errors.OpPrereqError: if the node is offline
994 msg = "Can't use offline node"
995 if lu.cfg.GetNodeInfo(node).offline:
996 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
999 def _CheckNodeNotDrained(lu, node):
1000 """Ensure that a given node is not drained.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @raise errors.OpPrereqError: if the node is drained
1007 if lu.cfg.GetNodeInfo(node).drained:
1008 raise errors.OpPrereqError("Can't use drained node %s" % node,
1012 def _CheckNodeVmCapable(lu, node):
1013 """Ensure that a given node is vm capable.
1015 @param lu: the LU on behalf of which we make the check
1016 @param node: the node to check
1017 @raise errors.OpPrereqError: if the node is not vm capable
1020 if not lu.cfg.GetNodeInfo(node).vm_capable:
1021 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1025 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1026 """Ensure that a node supports a given OS.
1028 @param lu: the LU on behalf of which we make the check
1029 @param node: the node to check
1030 @param os_name: the OS to query about
1031 @param force_variant: whether to ignore variant errors
1032 @raise errors.OpPrereqError: if the node is not supporting the OS
1035 result = lu.rpc.call_os_get(node, os_name)
1036 result.Raise("OS '%s' not in supported OS list for node %s" %
1038 prereq=True, ecode=errors.ECODE_INVAL)
1039 if not force_variant:
1040 _CheckOSVariant(result.payload, os_name)
1043 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1044 """Ensure that a node has the given secondary ip.
1046 @type lu: L{LogicalUnit}
1047 @param lu: the LU on behalf of which we make the check
1049 @param node: the node to check
1050 @type secondary_ip: string
1051 @param secondary_ip: the ip to check
1052 @type prereq: boolean
1053 @param prereq: whether to throw a prerequisite or an execute error
1054 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1055 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1058 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1059 result.Raise("Failure checking secondary ip on node %s" % node,
1060 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1061 if not result.payload:
1062 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1063 " please fix and re-run this command" % secondary_ip)
1065 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1067 raise errors.OpExecError(msg)
1070 def _GetClusterDomainSecret():
1071 """Reads the cluster domain secret.
1074 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1078 def _CheckInstanceState(lu, instance, req_states, msg=None):
1079 """Ensure that an instance is in one of the required states.
1081 @param lu: the LU on behalf of which we make the check
1082 @param instance: the instance to check
1083 @param msg: if passed, should be a message to replace the default one
1084 @raise errors.OpPrereqError: if the instance is not in the required state
1088 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1089 if instance.admin_state not in req_states:
1090 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1091 (instance.name, instance.admin_state, msg),
1094 if constants.ADMINST_UP not in req_states:
1095 pnode = instance.primary_node
1096 if not lu.cfg.GetNodeInfo(pnode).offline:
1097 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1098 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1099 prereq=True, ecode=errors.ECODE_ENVIRON)
1100 if instance.name in ins_l.payload:
1101 raise errors.OpPrereqError("Instance %s is running, %s" %
1102 (instance.name, msg), errors.ECODE_STATE)
1104 lu.LogWarning("Primary node offline, ignoring check that instance"
1108 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1109 """Computes if value is in the desired range.
1111 @param name: name of the parameter for which we perform the check
1112 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1114 @param ipolicy: dictionary containing min, max and std values
1115 @param value: actual value that we want to use
1116 @return: None or element not meeting the criteria
1120 if value in [None, constants.VALUE_AUTO]:
1122 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1123 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1124 if value > max_v or min_v > value:
1126 fqn = "%s/%s" % (name, qualifier)
1129 return ("%s value %s is not in range [%s, %s]" %
1130 (fqn, value, min_v, max_v))
1134 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1135 nic_count, disk_sizes, spindle_use,
1136 _compute_fn=_ComputeMinMaxSpec):
1137 """Verifies ipolicy against provided specs.
1140 @param ipolicy: The ipolicy
1142 @param mem_size: The memory size
1143 @type cpu_count: int
1144 @param cpu_count: Used cpu cores
1145 @type disk_count: int
1146 @param disk_count: Number of disks used
1147 @type nic_count: int
1148 @param nic_count: Number of nics used
1149 @type disk_sizes: list of ints
1150 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1151 @type spindle_use: int
1152 @param spindle_use: The number of spindles this instance uses
1153 @param _compute_fn: The compute function (unittest only)
1154 @return: A list of violations, or an empty list of no violations are found
1157 assert disk_count == len(disk_sizes)
1160 (constants.ISPEC_MEM_SIZE, "", mem_size),
1161 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1162 (constants.ISPEC_DISK_COUNT, "", disk_count),
1163 (constants.ISPEC_NIC_COUNT, "", nic_count),
1164 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1165 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1166 for idx, d in enumerate(disk_sizes)]
1169 (_compute_fn(name, qualifier, ipolicy, value)
1170 for (name, qualifier, value) in test_settings))
1173 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1174 _compute_fn=_ComputeIPolicySpecViolation):
1175 """Compute if instance meets the specs of ipolicy.
1178 @param ipolicy: The ipolicy to verify against
1179 @type instance: L{objects.Instance}
1180 @param instance: The instance to verify
1181 @param _compute_fn: The function to verify ipolicy (unittest only)
1182 @see: L{_ComputeIPolicySpecViolation}
1185 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1186 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1187 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1188 disk_count = len(instance.disks)
1189 disk_sizes = [disk.size for disk in instance.disks]
1190 nic_count = len(instance.nics)
1192 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1193 disk_sizes, spindle_use)
1196 def _ComputeIPolicyInstanceSpecViolation(
1197 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1198 """Compute if instance specs meets the specs of ipolicy.
1201 @param ipolicy: The ipolicy to verify against
1202 @param instance_spec: dict
1203 @param instance_spec: The instance spec to verify
1204 @param _compute_fn: The function to verify ipolicy (unittest only)
1205 @see: L{_ComputeIPolicySpecViolation}
1208 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1209 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1210 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1211 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1212 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1213 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1215 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216 disk_sizes, spindle_use)
1219 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1221 _compute_fn=_ComputeIPolicyInstanceViolation):
1222 """Compute if instance meets the specs of the new target group.
1224 @param ipolicy: The ipolicy to verify
1225 @param instance: The instance object to verify
1226 @param current_group: The current group of the instance
1227 @param target_group: The new group of the instance
1228 @param _compute_fn: The function to verify ipolicy (unittest only)
1229 @see: L{_ComputeIPolicySpecViolation}
1232 if current_group == target_group:
1235 return _compute_fn(ipolicy, instance)
1238 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1239 _compute_fn=_ComputeIPolicyNodeViolation):
1240 """Checks that the target node is correct in terms of instance policy.
1242 @param ipolicy: The ipolicy to verify
1243 @param instance: The instance object to verify
1244 @param node: The new node to relocate
1245 @param ignore: Ignore violations of the ipolicy
1246 @param _compute_fn: The function to verify ipolicy (unittest only)
1247 @see: L{_ComputeIPolicySpecViolation}
1250 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1251 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1254 msg = ("Instance does not meet target node group's (%s) instance"
1255 " policy: %s") % (node.group, utils.CommaJoin(res))
1259 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1262 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1263 """Computes a set of any instances that would violate the new ipolicy.
1265 @param old_ipolicy: The current (still in-place) ipolicy
1266 @param new_ipolicy: The new (to become) ipolicy
1267 @param instances: List of instances to verify
1268 @return: A list of instances which violates the new ipolicy but
1272 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1273 _ComputeViolatingInstances(old_ipolicy, instances))
1276 def _ExpandItemName(fn, name, kind):
1277 """Expand an item name.
1279 @param fn: the function to use for expansion
1280 @param name: requested item name
1281 @param kind: text description ('Node' or 'Instance')
1282 @return: the resolved (full) name
1283 @raise errors.OpPrereqError: if the item is not found
1286 full_name = fn(name)
1287 if full_name is None:
1288 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1293 def _ExpandNodeName(cfg, name):
1294 """Wrapper over L{_ExpandItemName} for nodes."""
1295 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1298 def _ExpandInstanceName(cfg, name):
1299 """Wrapper over L{_ExpandItemName} for instance."""
1300 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1303 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1304 minmem, maxmem, vcpus, nics, disk_template, disks,
1305 bep, hvp, hypervisor_name, tags):
1306 """Builds instance related env variables for hooks
1308 This builds the hook environment from individual variables.
1311 @param name: the name of the instance
1312 @type primary_node: string
1313 @param primary_node: the name of the instance's primary node
1314 @type secondary_nodes: list
1315 @param secondary_nodes: list of secondary nodes as strings
1316 @type os_type: string
1317 @param os_type: the name of the instance's OS
1318 @type status: string
1319 @param status: the desired status of the instance
1320 @type minmem: string
1321 @param minmem: the minimum memory size of the instance
1322 @type maxmem: string
1323 @param maxmem: the maximum memory size of the instance
1325 @param vcpus: the count of VCPUs the instance has
1327 @param nics: list of tuples (ip, mac, mode, link) representing
1328 the NICs the instance has
1329 @type disk_template: string
1330 @param disk_template: the disk template of the instance
1332 @param disks: the list of (size, mode) pairs
1334 @param bep: the backend parameters for the instance
1336 @param hvp: the hypervisor parameters for the instance
1337 @type hypervisor_name: string
1338 @param hypervisor_name: the hypervisor for the instance
1340 @param tags: list of instance tags as strings
1342 @return: the hook environment for this instance
1347 "INSTANCE_NAME": name,
1348 "INSTANCE_PRIMARY": primary_node,
1349 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1350 "INSTANCE_OS_TYPE": os_type,
1351 "INSTANCE_STATUS": status,
1352 "INSTANCE_MINMEM": minmem,
1353 "INSTANCE_MAXMEM": maxmem,
1354 # TODO(2.7) remove deprecated "memory" value
1355 "INSTANCE_MEMORY": maxmem,
1356 "INSTANCE_VCPUS": vcpus,
1357 "INSTANCE_DISK_TEMPLATE": disk_template,
1358 "INSTANCE_HYPERVISOR": hypervisor_name,
1361 nic_count = len(nics)
1362 for idx, (ip, mac, mode, link) in enumerate(nics):
1365 env["INSTANCE_NIC%d_IP" % idx] = ip
1366 env["INSTANCE_NIC%d_MAC" % idx] = mac
1367 env["INSTANCE_NIC%d_MODE" % idx] = mode
1368 env["INSTANCE_NIC%d_LINK" % idx] = link
1369 if mode == constants.NIC_MODE_BRIDGED:
1370 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1374 env["INSTANCE_NIC_COUNT"] = nic_count
1377 disk_count = len(disks)
1378 for idx, (size, mode) in enumerate(disks):
1379 env["INSTANCE_DISK%d_SIZE" % idx] = size
1380 env["INSTANCE_DISK%d_MODE" % idx] = mode
1384 env["INSTANCE_DISK_COUNT"] = disk_count
1389 env["INSTANCE_TAGS"] = " ".join(tags)
1391 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1392 for key, value in source.items():
1393 env["INSTANCE_%s_%s" % (kind, key)] = value
1398 def _NICListToTuple(lu, nics):
1399 """Build a list of nic information tuples.
1401 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1402 value in LUInstanceQueryData.
1404 @type lu: L{LogicalUnit}
1405 @param lu: the logical unit on whose behalf we execute
1406 @type nics: list of L{objects.NIC}
1407 @param nics: list of nics to convert to hooks tuples
1411 cluster = lu.cfg.GetClusterInfo()
1415 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1416 mode = filled_params[constants.NIC_MODE]
1417 link = filled_params[constants.NIC_LINK]
1418 hooks_nics.append((ip, mac, mode, link))
1422 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1423 """Builds instance related env variables for hooks from an object.
1425 @type lu: L{LogicalUnit}
1426 @param lu: the logical unit on whose behalf we execute
1427 @type instance: L{objects.Instance}
1428 @param instance: the instance for which we should build the
1430 @type override: dict
1431 @param override: dictionary with key/values that will override
1434 @return: the hook environment dictionary
1437 cluster = lu.cfg.GetClusterInfo()
1438 bep = cluster.FillBE(instance)
1439 hvp = cluster.FillHV(instance)
1441 "name": instance.name,
1442 "primary_node": instance.primary_node,
1443 "secondary_nodes": instance.secondary_nodes,
1444 "os_type": instance.os,
1445 "status": instance.admin_state,
1446 "maxmem": bep[constants.BE_MAXMEM],
1447 "minmem": bep[constants.BE_MINMEM],
1448 "vcpus": bep[constants.BE_VCPUS],
1449 "nics": _NICListToTuple(lu, instance.nics),
1450 "disk_template": instance.disk_template,
1451 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1454 "hypervisor_name": instance.hypervisor,
1455 "tags": instance.tags,
1458 args.update(override)
1459 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1462 def _AdjustCandidatePool(lu, exceptions):
1463 """Adjust the candidate pool after node operations.
1466 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1468 lu.LogInfo("Promoted nodes to master candidate role: %s",
1469 utils.CommaJoin(node.name for node in mod_list))
1470 for name in mod_list:
1471 lu.context.ReaddNode(name)
1472 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1474 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1478 def _DecideSelfPromotion(lu, exceptions=None):
1479 """Decide whether I should promote myself as a master candidate.
1482 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1483 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1484 # the new node will increase mc_max with one, so:
1485 mc_should = min(mc_should + 1, cp_size)
1486 return mc_now < mc_should
1489 def _ComputeViolatingInstances(ipolicy, instances):
1490 """Computes a set of instances who violates given ipolicy.
1492 @param ipolicy: The ipolicy to verify
1493 @type instances: object.Instance
1494 @param instances: List of instances to verify
1495 @return: A frozenset of instance names violating the ipolicy
1498 return frozenset([inst.name for inst in instances
1499 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1502 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1503 """Check that the brigdes needed by a list of nics exist.
1506 cluster = lu.cfg.GetClusterInfo()
1507 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1508 brlist = [params[constants.NIC_LINK] for params in paramslist
1509 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1511 result = lu.rpc.call_bridges_exist(target_node, brlist)
1512 result.Raise("Error checking bridges on destination node '%s'" %
1513 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1516 def _CheckInstanceBridgesExist(lu, instance, node=None):
1517 """Check that the brigdes needed by an instance exist.
1521 node = instance.primary_node
1522 _CheckNicsBridgesExist(lu, instance.nics, node)
1525 def _CheckOSVariant(os_obj, name):
1526 """Check whether an OS name conforms to the os variants specification.
1528 @type os_obj: L{objects.OS}
1529 @param os_obj: OS object to check
1531 @param name: OS name passed by the user, to check for validity
1534 variant = objects.OS.GetVariant(name)
1535 if not os_obj.supported_variants:
1537 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1538 " passed)" % (os_obj.name, variant),
1542 raise errors.OpPrereqError("OS name must include a variant",
1545 if variant not in os_obj.supported_variants:
1546 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1549 def _GetNodeInstancesInner(cfg, fn):
1550 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1553 def _GetNodeInstances(cfg, node_name):
1554 """Returns a list of all primary and secondary instances on a node.
1558 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1561 def _GetNodePrimaryInstances(cfg, node_name):
1562 """Returns primary instances on a node.
1565 return _GetNodeInstancesInner(cfg,
1566 lambda inst: node_name == inst.primary_node)
1569 def _GetNodeSecondaryInstances(cfg, node_name):
1570 """Returns secondary instances on a node.
1573 return _GetNodeInstancesInner(cfg,
1574 lambda inst: node_name in inst.secondary_nodes)
1577 def _GetStorageTypeArgs(cfg, storage_type):
1578 """Returns the arguments for a storage type.
1581 # Special case for file storage
1582 if storage_type == constants.ST_FILE:
1583 # storage.FileStorage wants a list of storage directories
1584 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1589 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1592 for dev in instance.disks:
1593 cfg.SetDiskID(dev, node_name)
1595 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1597 result.Raise("Failed to get disk status from node %s" % node_name,
1598 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1600 for idx, bdev_status in enumerate(result.payload):
1601 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1607 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1608 """Check the sanity of iallocator and node arguments and use the
1609 cluster-wide iallocator if appropriate.
1611 Check that at most one of (iallocator, node) is specified. If none is
1612 specified, then the LU's opcode's iallocator slot is filled with the
1613 cluster-wide default iallocator.
1615 @type iallocator_slot: string
1616 @param iallocator_slot: the name of the opcode iallocator slot
1617 @type node_slot: string
1618 @param node_slot: the name of the opcode target node slot
1621 node = getattr(lu.op, node_slot, None)
1622 ialloc = getattr(lu.op, iallocator_slot, None)
1624 if node is not None and ialloc is not None:
1625 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1627 elif node is None and ialloc is None:
1628 default_iallocator = lu.cfg.GetDefaultIAllocator()
1629 if default_iallocator:
1630 setattr(lu.op, iallocator_slot, default_iallocator)
1632 raise errors.OpPrereqError("No iallocator or node given and no"
1633 " cluster-wide default iallocator found;"
1634 " please specify either an iallocator or a"
1635 " node, or set a cluster-wide default"
1636 " iallocator", errors.ECODE_INVAL)
1639 def _GetDefaultIAllocator(cfg, ialloc):
1640 """Decides on which iallocator to use.
1642 @type cfg: L{config.ConfigWriter}
1643 @param cfg: Cluster configuration object
1644 @type ialloc: string or None
1645 @param ialloc: Iallocator specified in opcode
1647 @return: Iallocator name
1651 # Use default iallocator
1652 ialloc = cfg.GetDefaultIAllocator()
1655 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1656 " opcode nor as a cluster-wide default",
1662 class LUClusterPostInit(LogicalUnit):
1663 """Logical unit for running hooks after cluster initialization.
1666 HPATH = "cluster-init"
1667 HTYPE = constants.HTYPE_CLUSTER
1669 def BuildHooksEnv(self):
1674 "OP_TARGET": self.cfg.GetClusterName(),
1677 def BuildHooksNodes(self):
1678 """Build hooks nodes.
1681 return ([], [self.cfg.GetMasterNode()])
1683 def Exec(self, feedback_fn):
1690 class LUClusterDestroy(LogicalUnit):
1691 """Logical unit for destroying the cluster.
1694 HPATH = "cluster-destroy"
1695 HTYPE = constants.HTYPE_CLUSTER
1697 def BuildHooksEnv(self):
1702 "OP_TARGET": self.cfg.GetClusterName(),
1705 def BuildHooksNodes(self):
1706 """Build hooks nodes.
1711 def CheckPrereq(self):
1712 """Check prerequisites.
1714 This checks whether the cluster is empty.
1716 Any errors are signaled by raising errors.OpPrereqError.
1719 master = self.cfg.GetMasterNode()
1721 nodelist = self.cfg.GetNodeList()
1722 if len(nodelist) != 1 or nodelist[0] != master:
1723 raise errors.OpPrereqError("There are still %d node(s) in"
1724 " this cluster." % (len(nodelist) - 1),
1726 instancelist = self.cfg.GetInstanceList()
1728 raise errors.OpPrereqError("There are still %d instance(s) in"
1729 " this cluster." % len(instancelist),
1732 def Exec(self, feedback_fn):
1733 """Destroys the cluster.
1736 master_params = self.cfg.GetMasterNetworkParameters()
1738 # Run post hooks on master node before it's removed
1739 _RunPostHook(self, master_params.name)
1741 ems = self.cfg.GetUseExternalMipScript()
1742 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1745 self.LogWarning("Error disabling the master IP address: %s",
1748 return master_params.name
1751 def _VerifyCertificate(filename):
1752 """Verifies a certificate for L{LUClusterVerifyConfig}.
1754 @type filename: string
1755 @param filename: Path to PEM file
1759 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1760 utils.ReadFile(filename))
1761 except Exception, err: # pylint: disable=W0703
1762 return (LUClusterVerifyConfig.ETYPE_ERROR,
1763 "Failed to load X509 certificate %s: %s" % (filename, err))
1766 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1767 constants.SSL_CERT_EXPIRATION_ERROR)
1770 fnamemsg = "While verifying %s: %s" % (filename, msg)
1775 return (None, fnamemsg)
1776 elif errcode == utils.CERT_WARNING:
1777 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1778 elif errcode == utils.CERT_ERROR:
1779 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1781 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1784 def _GetAllHypervisorParameters(cluster, instances):
1785 """Compute the set of all hypervisor parameters.
1787 @type cluster: L{objects.Cluster}
1788 @param cluster: the cluster object
1789 @param instances: list of L{objects.Instance}
1790 @param instances: additional instances from which to obtain parameters
1791 @rtype: list of (origin, hypervisor, parameters)
1792 @return: a list with all parameters found, indicating the hypervisor they
1793 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1798 for hv_name in cluster.enabled_hypervisors:
1799 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1801 for os_name, os_hvp in cluster.os_hvp.items():
1802 for hv_name, hv_params in os_hvp.items():
1804 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1805 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1807 # TODO: collapse identical parameter values in a single one
1808 for instance in instances:
1809 if instance.hvparams:
1810 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1811 cluster.FillHV(instance)))
1816 class _VerifyErrors(object):
1817 """Mix-in for cluster/group verify LUs.
1819 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1820 self.op and self._feedback_fn to be available.)
1824 ETYPE_FIELD = "code"
1825 ETYPE_ERROR = "ERROR"
1826 ETYPE_WARNING = "WARNING"
1828 def _Error(self, ecode, item, msg, *args, **kwargs):
1829 """Format an error message.
1831 Based on the opcode's error_codes parameter, either format a
1832 parseable error code, or a simpler error string.
1834 This must be called only from Exec and functions called from Exec.
1837 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1838 itype, etxt, _ = ecode
1839 # first complete the msg
1842 # then format the whole message
1843 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1844 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1850 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1851 # and finally report it via the feedback_fn
1852 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1854 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1855 """Log an error message if the passed condition is True.
1859 or self.op.debug_simulate_errors) # pylint: disable=E1101
1861 # If the error code is in the list of ignored errors, demote the error to a
1863 (_, etxt, _) = ecode
1864 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1865 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1868 self._Error(ecode, *args, **kwargs)
1870 # do not mark the operation as failed for WARN cases only
1871 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1872 self.bad = self.bad or cond
1875 class LUClusterVerify(NoHooksLU):
1876 """Submits all jobs necessary to verify the cluster.
1881 def ExpandNames(self):
1882 self.needed_locks = {}
1884 def Exec(self, feedback_fn):
1887 if self.op.group_name:
1888 groups = [self.op.group_name]
1889 depends_fn = lambda: None
1891 groups = self.cfg.GetNodeGroupList()
1893 # Verify global configuration
1895 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1898 # Always depend on global verification
1899 depends_fn = lambda: [(-len(jobs), [])]
1902 [opcodes.OpClusterVerifyGroup(group_name=group,
1903 ignore_errors=self.op.ignore_errors,
1904 depends=depends_fn())]
1905 for group in groups)
1907 # Fix up all parameters
1908 for op in itertools.chain(*jobs): # pylint: disable=W0142
1909 op.debug_simulate_errors = self.op.debug_simulate_errors
1910 op.verbose = self.op.verbose
1911 op.error_codes = self.op.error_codes
1913 op.skip_checks = self.op.skip_checks
1914 except AttributeError:
1915 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1917 return ResultWithJobs(jobs)
1920 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1921 """Verifies the cluster config.
1926 def _VerifyHVP(self, hvp_data):
1927 """Verifies locally the syntax of the hypervisor parameters.
1930 for item, hv_name, hv_params in hvp_data:
1931 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1934 hv_class = hypervisor.GetHypervisor(hv_name)
1935 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1936 hv_class.CheckParameterSyntax(hv_params)
1937 except errors.GenericError, err:
1938 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1940 def ExpandNames(self):
1941 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1942 self.share_locks = _ShareAll()
1944 def CheckPrereq(self):
1945 """Check prerequisites.
1948 # Retrieve all information
1949 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1950 self.all_node_info = self.cfg.GetAllNodesInfo()
1951 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1953 def Exec(self, feedback_fn):
1954 """Verify integrity of cluster, performing various test on nodes.
1958 self._feedback_fn = feedback_fn
1960 feedback_fn("* Verifying cluster config")
1962 for msg in self.cfg.VerifyConfig():
1963 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1965 feedback_fn("* Verifying cluster certificate files")
1967 for cert_filename in constants.ALL_CERT_FILES:
1968 (errcode, msg) = _VerifyCertificate(cert_filename)
1969 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1971 feedback_fn("* Verifying hypervisor parameters")
1973 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1974 self.all_inst_info.values()))
1976 feedback_fn("* Verifying all nodes belong to an existing group")
1978 # We do this verification here because, should this bogus circumstance
1979 # occur, it would never be caught by VerifyGroup, which only acts on
1980 # nodes/instances reachable from existing node groups.
1982 dangling_nodes = set(node.name for node in self.all_node_info.values()
1983 if node.group not in self.all_group_info)
1985 dangling_instances = {}
1986 no_node_instances = []
1988 for inst in self.all_inst_info.values():
1989 if inst.primary_node in dangling_nodes:
1990 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1991 elif inst.primary_node not in self.all_node_info:
1992 no_node_instances.append(inst.name)
1997 utils.CommaJoin(dangling_instances.get(node.name,
1999 for node in dangling_nodes]
2001 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2003 "the following nodes (and their instances) belong to a non"
2004 " existing group: %s", utils.CommaJoin(pretty_dangling))
2006 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2008 "the following instances have a non-existing primary-node:"
2009 " %s", utils.CommaJoin(no_node_instances))
2014 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2015 """Verifies the status of a node group.
2018 HPATH = "cluster-verify"
2019 HTYPE = constants.HTYPE_CLUSTER
2022 _HOOKS_INDENT_RE = re.compile("^", re.M)
2024 class NodeImage(object):
2025 """A class representing the logical and physical status of a node.
2028 @ivar name: the node name to which this object refers
2029 @ivar volumes: a structure as returned from
2030 L{ganeti.backend.GetVolumeList} (runtime)
2031 @ivar instances: a list of running instances (runtime)
2032 @ivar pinst: list of configured primary instances (config)
2033 @ivar sinst: list of configured secondary instances (config)
2034 @ivar sbp: dictionary of {primary-node: list of instances} for all
2035 instances for which this node is secondary (config)
2036 @ivar mfree: free memory, as reported by hypervisor (runtime)
2037 @ivar dfree: free disk, as reported by the node (runtime)
2038 @ivar offline: the offline status (config)
2039 @type rpc_fail: boolean
2040 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2041 not whether the individual keys were correct) (runtime)
2042 @type lvm_fail: boolean
2043 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2044 @type hyp_fail: boolean
2045 @ivar hyp_fail: whether the RPC call didn't return the instance list
2046 @type ghost: boolean
2047 @ivar ghost: whether this is a known node or not (config)
2048 @type os_fail: boolean
2049 @ivar os_fail: whether the RPC call didn't return valid OS data
2051 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2052 @type vm_capable: boolean
2053 @ivar vm_capable: whether the node can host instances
2056 def __init__(self, offline=False, name=None, vm_capable=True):
2065 self.offline = offline
2066 self.vm_capable = vm_capable
2067 self.rpc_fail = False
2068 self.lvm_fail = False
2069 self.hyp_fail = False
2071 self.os_fail = False
2074 def ExpandNames(self):
2075 # This raises errors.OpPrereqError on its own:
2076 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2078 # Get instances in node group; this is unsafe and needs verification later
2080 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2082 self.needed_locks = {
2083 locking.LEVEL_INSTANCE: inst_names,
2084 locking.LEVEL_NODEGROUP: [self.group_uuid],
2085 locking.LEVEL_NODE: [],
2088 self.share_locks = _ShareAll()
2090 def DeclareLocks(self, level):
2091 if level == locking.LEVEL_NODE:
2092 # Get members of node group; this is unsafe and needs verification later
2093 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2095 all_inst_info = self.cfg.GetAllInstancesInfo()
2097 # In Exec(), we warn about mirrored instances that have primary and
2098 # secondary living in separate node groups. To fully verify that
2099 # volumes for these instances are healthy, we will need to do an
2100 # extra call to their secondaries. We ensure here those nodes will
2102 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2103 # Important: access only the instances whose lock is owned
2104 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2105 nodes.update(all_inst_info[inst].secondary_nodes)
2107 self.needed_locks[locking.LEVEL_NODE] = nodes
2109 def CheckPrereq(self):
2110 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2111 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2113 group_nodes = set(self.group_info.members)
2115 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2118 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2120 unlocked_instances = \
2121 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2124 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2125 utils.CommaJoin(unlocked_nodes),
2128 if unlocked_instances:
2129 raise errors.OpPrereqError("Missing lock for instances: %s" %
2130 utils.CommaJoin(unlocked_instances),
2133 self.all_node_info = self.cfg.GetAllNodesInfo()
2134 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2136 self.my_node_names = utils.NiceSort(group_nodes)
2137 self.my_inst_names = utils.NiceSort(group_instances)
2139 self.my_node_info = dict((name, self.all_node_info[name])
2140 for name in self.my_node_names)
2142 self.my_inst_info = dict((name, self.all_inst_info[name])
2143 for name in self.my_inst_names)
2145 # We detect here the nodes that will need the extra RPC calls for verifying
2146 # split LV volumes; they should be locked.
2147 extra_lv_nodes = set()
2149 for inst in self.my_inst_info.values():
2150 if inst.disk_template in constants.DTS_INT_MIRROR:
2151 for nname in inst.all_nodes:
2152 if self.all_node_info[nname].group != self.group_uuid:
2153 extra_lv_nodes.add(nname)
2155 unlocked_lv_nodes = \
2156 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2158 if unlocked_lv_nodes:
2159 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2160 utils.CommaJoin(unlocked_lv_nodes),
2162 self.extra_lv_nodes = list(extra_lv_nodes)
2164 def _VerifyNode(self, ninfo, nresult):
2165 """Perform some basic validation on data returned from a node.
2167 - check the result data structure is well formed and has all the
2169 - check ganeti version
2171 @type ninfo: L{objects.Node}
2172 @param ninfo: the node to check
2173 @param nresult: the results from the node
2175 @return: whether overall this call was successful (and we can expect
2176 reasonable values in the respose)
2180 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2182 # main result, nresult should be a non-empty dict
2183 test = not nresult or not isinstance(nresult, dict)
2184 _ErrorIf(test, constants.CV_ENODERPC, node,
2185 "unable to verify node: no data returned")
2189 # compares ganeti version
2190 local_version = constants.PROTOCOL_VERSION
2191 remote_version = nresult.get("version", None)
2192 test = not (remote_version and
2193 isinstance(remote_version, (list, tuple)) and
2194 len(remote_version) == 2)
2195 _ErrorIf(test, constants.CV_ENODERPC, node,
2196 "connection to node returned invalid data")
2200 test = local_version != remote_version[0]
2201 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2202 "incompatible protocol versions: master %s,"
2203 " node %s", local_version, remote_version[0])
2207 # node seems compatible, we can actually try to look into its results
2209 # full package version
2210 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2211 constants.CV_ENODEVERSION, node,
2212 "software version mismatch: master %s, node %s",
2213 constants.RELEASE_VERSION, remote_version[1],
2214 code=self.ETYPE_WARNING)
2216 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2217 if ninfo.vm_capable and isinstance(hyp_result, dict):
2218 for hv_name, hv_result in hyp_result.iteritems():
2219 test = hv_result is not None
2220 _ErrorIf(test, constants.CV_ENODEHV, node,
2221 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2223 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2224 if ninfo.vm_capable and isinstance(hvp_result, list):
2225 for item, hv_name, hv_result in hvp_result:
2226 _ErrorIf(True, constants.CV_ENODEHV, node,
2227 "hypervisor %s parameter verify failure (source %s): %s",
2228 hv_name, item, hv_result)
2230 test = nresult.get(constants.NV_NODESETUP,
2231 ["Missing NODESETUP results"])
2232 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2237 def _VerifyNodeTime(self, ninfo, nresult,
2238 nvinfo_starttime, nvinfo_endtime):
2239 """Check the node time.
2241 @type ninfo: L{objects.Node}
2242 @param ninfo: the node to check
2243 @param nresult: the remote results for the node
2244 @param nvinfo_starttime: the start time of the RPC call
2245 @param nvinfo_endtime: the end time of the RPC call
2249 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2251 ntime = nresult.get(constants.NV_TIME, None)
2253 ntime_merged = utils.MergeTime(ntime)
2254 except (ValueError, TypeError):
2255 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2258 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2259 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2260 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2261 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2265 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2266 "Node time diverges by at least %s from master node time",
2269 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2270 """Check the node LVM results.
2272 @type ninfo: L{objects.Node}
2273 @param ninfo: the node to check
2274 @param nresult: the remote results for the node
2275 @param vg_name: the configured VG name
2282 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2284 # checks vg existence and size > 20G
2285 vglist = nresult.get(constants.NV_VGLIST, None)
2287 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2289 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2290 constants.MIN_VG_SIZE)
2291 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2294 pvlist = nresult.get(constants.NV_PVLIST, None)
2295 test = pvlist is None
2296 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2298 # check that ':' is not present in PV names, since it's a
2299 # special character for lvcreate (denotes the range of PEs to
2301 for _, pvname, owner_vg in pvlist:
2302 test = ":" in pvname
2303 _ErrorIf(test, constants.CV_ENODELVM, node,
2304 "Invalid character ':' in PV '%s' of VG '%s'",
2307 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2308 """Check the node bridges.
2310 @type ninfo: L{objects.Node}
2311 @param ninfo: the node to check
2312 @param nresult: the remote results for the node
2313 @param bridges: the expected list of bridges
2320 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2322 missing = nresult.get(constants.NV_BRIDGES, None)
2323 test = not isinstance(missing, list)
2324 _ErrorIf(test, constants.CV_ENODENET, node,
2325 "did not return valid bridge information")
2327 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2328 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2330 def _VerifyNodeUserScripts(self, ninfo, nresult):
2331 """Check the results of user scripts presence and executability on the node
2333 @type ninfo: L{objects.Node}
2334 @param ninfo: the node to check
2335 @param nresult: the remote results for the node
2340 test = not constants.NV_USERSCRIPTS in nresult
2341 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2342 "did not return user scripts information")
2344 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2346 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2347 "user scripts not present or not executable: %s" %
2348 utils.CommaJoin(sorted(broken_scripts)))
2350 def _VerifyNodeNetwork(self, ninfo, nresult):
2351 """Check the node network connectivity results.
2353 @type ninfo: L{objects.Node}
2354 @param ninfo: the node to check
2355 @param nresult: the remote results for the node
2359 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2361 test = constants.NV_NODELIST not in nresult
2362 _ErrorIf(test, constants.CV_ENODESSH, node,
2363 "node hasn't returned node ssh connectivity data")
2365 if nresult[constants.NV_NODELIST]:
2366 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2367 _ErrorIf(True, constants.CV_ENODESSH, node,
2368 "ssh communication with node '%s': %s", a_node, a_msg)
2370 test = constants.NV_NODENETTEST not in nresult
2371 _ErrorIf(test, constants.CV_ENODENET, node,
2372 "node hasn't returned node tcp connectivity data")
2374 if nresult[constants.NV_NODENETTEST]:
2375 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2377 _ErrorIf(True, constants.CV_ENODENET, node,
2378 "tcp communication with node '%s': %s",
2379 anode, nresult[constants.NV_NODENETTEST][anode])
2381 test = constants.NV_MASTERIP not in nresult
2382 _ErrorIf(test, constants.CV_ENODENET, node,
2383 "node hasn't returned node master IP reachability data")
2385 if not nresult[constants.NV_MASTERIP]:
2386 if node == self.master_node:
2387 msg = "the master node cannot reach the master IP (not configured?)"
2389 msg = "cannot reach the master IP"
2390 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2392 def _VerifyInstance(self, instance, instanceconfig, node_image,
2394 """Verify an instance.
2396 This function checks to see if the required block devices are
2397 available on the instance's node.
2400 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2401 node_current = instanceconfig.primary_node
2403 node_vol_should = {}
2404 instanceconfig.MapLVsByNode(node_vol_should)
2406 cluster = self.cfg.GetClusterInfo()
2407 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2409 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2410 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2412 for node in node_vol_should:
2413 n_img = node_image[node]
2414 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2415 # ignore missing volumes on offline or broken nodes
2417 for volume in node_vol_should[node]:
2418 test = volume not in n_img.volumes
2419 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2420 "volume %s missing on node %s", volume, node)
2422 if instanceconfig.admin_state == constants.ADMINST_UP:
2423 pri_img = node_image[node_current]
2424 test = instance not in pri_img.instances and not pri_img.offline
2425 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2426 "instance not running on its primary node %s",
2429 diskdata = [(nname, success, status, idx)
2430 for (nname, disks) in diskstatus.items()
2431 for idx, (success, status) in enumerate(disks)]
2433 for nname, success, bdev_status, idx in diskdata:
2434 # the 'ghost node' construction in Exec() ensures that we have a
2436 snode = node_image[nname]
2437 bad_snode = snode.ghost or snode.offline
2438 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2439 not success and not bad_snode,
2440 constants.CV_EINSTANCEFAULTYDISK, instance,
2441 "couldn't retrieve status for disk/%s on %s: %s",
2442 idx, nname, bdev_status)
2443 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2444 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2445 constants.CV_EINSTANCEFAULTYDISK, instance,
2446 "disk/%s on %s is faulty", idx, nname)
2448 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2449 """Verify if there are any unknown volumes in the cluster.
2451 The .os, .swap and backup volumes are ignored. All other volumes are
2452 reported as unknown.
2454 @type reserved: L{ganeti.utils.FieldSet}
2455 @param reserved: a FieldSet of reserved volume names
2458 for node, n_img in node_image.items():
2459 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2460 self.all_node_info[node].group != self.group_uuid):
2461 # skip non-healthy nodes
2463 for volume in n_img.volumes:
2464 test = ((node not in node_vol_should or
2465 volume not in node_vol_should[node]) and
2466 not reserved.Matches(volume))
2467 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2468 "volume %s is unknown", volume)
2470 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2471 """Verify N+1 Memory Resilience.
2473 Check that if one single node dies we can still start all the
2474 instances it was primary for.
2477 cluster_info = self.cfg.GetClusterInfo()
2478 for node, n_img in node_image.items():
2479 # This code checks that every node which is now listed as
2480 # secondary has enough memory to host all instances it is
2481 # supposed to should a single other node in the cluster fail.
2482 # FIXME: not ready for failover to an arbitrary node
2483 # FIXME: does not support file-backed instances
2484 # WARNING: we currently take into account down instances as well
2485 # as up ones, considering that even if they're down someone
2486 # might want to start them even in the event of a node failure.
2487 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2488 # we're skipping nodes marked offline and nodes in other groups from
2489 # the N+1 warning, since most likely we don't have good memory
2490 # infromation from them; we already list instances living on such
2491 # nodes, and that's enough warning
2493 #TODO(dynmem): also consider ballooning out other instances
2494 for prinode, instances in n_img.sbp.items():
2496 for instance in instances:
2497 bep = cluster_info.FillBE(instance_cfg[instance])
2498 if bep[constants.BE_AUTO_BALANCE]:
2499 needed_mem += bep[constants.BE_MINMEM]
2500 test = n_img.mfree < needed_mem
2501 self._ErrorIf(test, constants.CV_ENODEN1, node,
2502 "not enough memory to accomodate instance failovers"
2503 " should node %s fail (%dMiB needed, %dMiB available)",
2504 prinode, needed_mem, n_img.mfree)
2507 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2508 (files_all, files_opt, files_mc, files_vm)):
2509 """Verifies file checksums collected from all nodes.
2511 @param errorif: Callback for reporting errors
2512 @param nodeinfo: List of L{objects.Node} objects
2513 @param master_node: Name of master node
2514 @param all_nvinfo: RPC results
2517 # Define functions determining which nodes to consider for a file
2520 (files_mc, lambda node: (node.master_candidate or
2521 node.name == master_node)),
2522 (files_vm, lambda node: node.vm_capable),
2525 # Build mapping from filename to list of nodes which should have the file
2527 for (files, fn) in files2nodefn:
2529 filenodes = nodeinfo
2531 filenodes = filter(fn, nodeinfo)
2532 nodefiles.update((filename,
2533 frozenset(map(operator.attrgetter("name"), filenodes)))
2534 for filename in files)
2536 assert set(nodefiles) == (files_all | files_mc | files_vm)
2538 fileinfo = dict((filename, {}) for filename in nodefiles)
2539 ignore_nodes = set()
2541 for node in nodeinfo:
2543 ignore_nodes.add(node.name)
2546 nresult = all_nvinfo[node.name]
2548 if nresult.fail_msg or not nresult.payload:
2551 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2553 test = not (node_files and isinstance(node_files, dict))
2554 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2555 "Node did not return file checksum data")
2557 ignore_nodes.add(node.name)
2560 # Build per-checksum mapping from filename to nodes having it
2561 for (filename, checksum) in node_files.items():
2562 assert filename in nodefiles
2563 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2565 for (filename, checksums) in fileinfo.items():
2566 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2568 # Nodes having the file
2569 with_file = frozenset(node_name
2570 for nodes in fileinfo[filename].values()
2571 for node_name in nodes) - ignore_nodes
2573 expected_nodes = nodefiles[filename] - ignore_nodes
2575 # Nodes missing file
2576 missing_file = expected_nodes - with_file
2578 if filename in files_opt:
2580 errorif(missing_file and missing_file != expected_nodes,
2581 constants.CV_ECLUSTERFILECHECK, None,
2582 "File %s is optional, but it must exist on all or no"
2583 " nodes (not found on %s)",
2584 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2586 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2587 "File %s is missing from node(s) %s", filename,
2588 utils.CommaJoin(utils.NiceSort(missing_file)))
2590 # Warn if a node has a file it shouldn't
2591 unexpected = with_file - expected_nodes
2593 constants.CV_ECLUSTERFILECHECK, None,
2594 "File %s should not exist on node(s) %s",
2595 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2597 # See if there are multiple versions of the file
2598 test = len(checksums) > 1
2600 variants = ["variant %s on %s" %
2601 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2602 for (idx, (checksum, nodes)) in
2603 enumerate(sorted(checksums.items()))]
2607 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2608 "File %s found with %s different checksums (%s)",
2609 filename, len(checksums), "; ".join(variants))
2611 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2613 """Verifies and the node DRBD status.
2615 @type ninfo: L{objects.Node}
2616 @param ninfo: the node to check
2617 @param nresult: the remote results for the node
2618 @param instanceinfo: the dict of instances
2619 @param drbd_helper: the configured DRBD usermode helper
2620 @param drbd_map: the DRBD map as returned by
2621 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2625 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2628 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2629 test = (helper_result is None)
2630 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2631 "no drbd usermode helper returned")
2633 status, payload = helper_result
2635 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2636 "drbd usermode helper check unsuccessful: %s", payload)
2637 test = status and (payload != drbd_helper)
2638 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2639 "wrong drbd usermode helper: %s", payload)
2641 # compute the DRBD minors
2643 for minor, instance in drbd_map[node].items():
2644 test = instance not in instanceinfo
2645 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2646 "ghost instance '%s' in temporary DRBD map", instance)
2647 # ghost instance should not be running, but otherwise we
2648 # don't give double warnings (both ghost instance and
2649 # unallocated minor in use)
2651 node_drbd[minor] = (instance, False)
2653 instance = instanceinfo[instance]
2654 node_drbd[minor] = (instance.name,
2655 instance.admin_state == constants.ADMINST_UP)
2657 # and now check them
2658 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2659 test = not isinstance(used_minors, (tuple, list))
2660 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2661 "cannot parse drbd status file: %s", str(used_minors))
2663 # we cannot check drbd status
2666 for minor, (iname, must_exist) in node_drbd.items():
2667 test = minor not in used_minors and must_exist
2668 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2669 "drbd minor %d of instance %s is not active", minor, iname)
2670 for minor in used_minors:
2671 test = minor not in node_drbd
2672 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2673 "unallocated drbd minor %d is in use", minor)
2675 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2676 """Builds the node OS structures.
2678 @type ninfo: L{objects.Node}
2679 @param ninfo: the node to check
2680 @param nresult: the remote results for the node
2681 @param nimg: the node image object
2685 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2687 remote_os = nresult.get(constants.NV_OSLIST, None)
2688 test = (not isinstance(remote_os, list) or
2689 not compat.all(isinstance(v, list) and len(v) == 7
2690 for v in remote_os))
2692 _ErrorIf(test, constants.CV_ENODEOS, node,
2693 "node hasn't returned valid OS data")
2702 for (name, os_path, status, diagnose,
2703 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2705 if name not in os_dict:
2708 # parameters is a list of lists instead of list of tuples due to
2709 # JSON lacking a real tuple type, fix it:
2710 parameters = [tuple(v) for v in parameters]
2711 os_dict[name].append((os_path, status, diagnose,
2712 set(variants), set(parameters), set(api_ver)))
2714 nimg.oslist = os_dict
2716 def _VerifyNodeOS(self, ninfo, nimg, base):
2717 """Verifies the node OS list.
2719 @type ninfo: L{objects.Node}
2720 @param ninfo: the node to check
2721 @param nimg: the node image object
2722 @param base: the 'template' node we match against (e.g. from the master)
2726 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2728 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2730 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2731 for os_name, os_data in nimg.oslist.items():
2732 assert os_data, "Empty OS status for OS %s?!" % os_name
2733 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2734 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2735 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2736 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2737 "OS '%s' has multiple entries (first one shadows the rest): %s",
2738 os_name, utils.CommaJoin([v[0] for v in os_data]))
2739 # comparisons with the 'base' image
2740 test = os_name not in base.oslist
2741 _ErrorIf(test, constants.CV_ENODEOS, node,
2742 "Extra OS %s not present on reference node (%s)",
2746 assert base.oslist[os_name], "Base node has empty OS status?"
2747 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2749 # base OS is invalid, skipping
2751 for kind, a, b in [("API version", f_api, b_api),
2752 ("variants list", f_var, b_var),
2753 ("parameters", beautify_params(f_param),
2754 beautify_params(b_param))]:
2755 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2756 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2757 kind, os_name, base.name,
2758 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2760 # check any missing OSes
2761 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2762 _ErrorIf(missing, constants.CV_ENODEOS, node,
2763 "OSes present on reference node %s but missing on this node: %s",
2764 base.name, utils.CommaJoin(missing))
2766 def _VerifyOob(self, ninfo, nresult):
2767 """Verifies out of band functionality of a node.
2769 @type ninfo: L{objects.Node}
2770 @param ninfo: the node to check
2771 @param nresult: the remote results for the node
2775 # We just have to verify the paths on master and/or master candidates
2776 # as the oob helper is invoked on the master
2777 if ((ninfo.master_candidate or ninfo.master_capable) and
2778 constants.NV_OOB_PATHS in nresult):
2779 for path_result in nresult[constants.NV_OOB_PATHS]:
2780 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2782 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2783 """Verifies and updates the node volume data.
2785 This function will update a L{NodeImage}'s internal structures
2786 with data from the remote call.
2788 @type ninfo: L{objects.Node}
2789 @param ninfo: the node to check
2790 @param nresult: the remote results for the node
2791 @param nimg: the node image object
2792 @param vg_name: the configured VG name
2796 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2798 nimg.lvm_fail = True
2799 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2802 elif isinstance(lvdata, basestring):
2803 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2804 utils.SafeEncode(lvdata))
2805 elif not isinstance(lvdata, dict):
2806 _ErrorIf(True, constants.CV_ENODELVM, node,
2807 "rpc call to node failed (lvlist)")
2809 nimg.volumes = lvdata
2810 nimg.lvm_fail = False
2812 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2813 """Verifies and updates the node instance list.
2815 If the listing was successful, then updates this node's instance
2816 list. Otherwise, it marks the RPC call as failed for the instance
2819 @type ninfo: L{objects.Node}
2820 @param ninfo: the node to check
2821 @param nresult: the remote results for the node
2822 @param nimg: the node image object
2825 idata = nresult.get(constants.NV_INSTANCELIST, None)
2826 test = not isinstance(idata, list)
2827 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2828 "rpc call to node failed (instancelist): %s",
2829 utils.SafeEncode(str(idata)))
2831 nimg.hyp_fail = True
2833 nimg.instances = idata
2835 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2836 """Verifies and computes a node information map
2838 @type ninfo: L{objects.Node}
2839 @param ninfo: the node to check
2840 @param nresult: the remote results for the node
2841 @param nimg: the node image object
2842 @param vg_name: the configured VG name
2846 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2848 # try to read free memory (from the hypervisor)
2849 hv_info = nresult.get(constants.NV_HVINFO, None)
2850 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2851 _ErrorIf(test, constants.CV_ENODEHV, node,
2852 "rpc call to node failed (hvinfo)")
2855 nimg.mfree = int(hv_info["memory_free"])
2856 except (ValueError, TypeError):
2857 _ErrorIf(True, constants.CV_ENODERPC, node,
2858 "node returned invalid nodeinfo, check hypervisor")
2860 # FIXME: devise a free space model for file based instances as well
2861 if vg_name is not None:
2862 test = (constants.NV_VGLIST not in nresult or
2863 vg_name not in nresult[constants.NV_VGLIST])
2864 _ErrorIf(test, constants.CV_ENODELVM, node,
2865 "node didn't return data for the volume group '%s'"
2866 " - it is either missing or broken", vg_name)
2869 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2870 except (ValueError, TypeError):
2871 _ErrorIf(True, constants.CV_ENODERPC, node,
2872 "node returned invalid LVM info, check LVM status")
2874 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2875 """Gets per-disk status information for all instances.
2877 @type nodelist: list of strings
2878 @param nodelist: Node names
2879 @type node_image: dict of (name, L{objects.Node})
2880 @param node_image: Node objects
2881 @type instanceinfo: dict of (name, L{objects.Instance})
2882 @param instanceinfo: Instance objects
2883 @rtype: {instance: {node: [(succes, payload)]}}
2884 @return: a dictionary of per-instance dictionaries with nodes as
2885 keys and disk information as values; the disk information is a
2886 list of tuples (success, payload)
2889 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2892 node_disks_devonly = {}
2893 diskless_instances = set()
2894 diskless = constants.DT_DISKLESS
2896 for nname in nodelist:
2897 node_instances = list(itertools.chain(node_image[nname].pinst,
2898 node_image[nname].sinst))
2899 diskless_instances.update(inst for inst in node_instances
2900 if instanceinfo[inst].disk_template == diskless)
2901 disks = [(inst, disk)
2902 for inst in node_instances
2903 for disk in instanceinfo[inst].disks]
2906 # No need to collect data
2909 node_disks[nname] = disks
2911 # _AnnotateDiskParams makes already copies of the disks
2913 for (inst, dev) in disks:
2914 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2915 self.cfg.SetDiskID(anno_disk, nname)
2916 devonly.append(anno_disk)
2918 node_disks_devonly[nname] = devonly
2920 assert len(node_disks) == len(node_disks_devonly)
2922 # Collect data from all nodes with disks
2923 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2926 assert len(result) == len(node_disks)
2930 for (nname, nres) in result.items():
2931 disks = node_disks[nname]
2934 # No data from this node
2935 data = len(disks) * [(False, "node offline")]
2938 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2939 "while getting disk information: %s", msg)
2941 # No data from this node
2942 data = len(disks) * [(False, msg)]
2945 for idx, i in enumerate(nres.payload):
2946 if isinstance(i, (tuple, list)) and len(i) == 2:
2949 logging.warning("Invalid result from node %s, entry %d: %s",
2951 data.append((False, "Invalid result from the remote node"))
2953 for ((inst, _), status) in zip(disks, data):
2954 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2956 # Add empty entries for diskless instances.
2957 for inst in diskless_instances:
2958 assert inst not in instdisk
2961 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2962 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2963 compat.all(isinstance(s, (tuple, list)) and
2964 len(s) == 2 for s in statuses)
2965 for inst, nnames in instdisk.items()
2966 for nname, statuses in nnames.items())
2967 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2972 def _SshNodeSelector(group_uuid, all_nodes):
2973 """Create endless iterators for all potential SSH check hosts.
2976 nodes = [node for node in all_nodes
2977 if (node.group != group_uuid and
2979 keyfunc = operator.attrgetter("group")
2981 return map(itertools.cycle,
2982 [sorted(map(operator.attrgetter("name"), names))
2983 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2987 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2988 """Choose which nodes should talk to which other nodes.
2990 We will make nodes contact all nodes in their group, and one node from
2993 @warning: This algorithm has a known issue if one node group is much
2994 smaller than others (e.g. just one node). In such a case all other
2995 nodes will talk to the single node.
2998 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2999 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3001 return (online_nodes,
3002 dict((name, sorted([i.next() for i in sel]))
3003 for name in online_nodes))
3005 def BuildHooksEnv(self):
3008 Cluster-Verify hooks just ran in the post phase and their failure makes
3009 the output be logged in the verify output and the verification to fail.
3013 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3016 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3017 for node in self.my_node_info.values())
3021 def BuildHooksNodes(self):
3022 """Build hooks nodes.
3025 return ([], self.my_node_names)
3027 def Exec(self, feedback_fn):
3028 """Verify integrity of the node group, performing various test on nodes.
3031 # This method has too many local variables. pylint: disable=R0914
3032 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3034 if not self.my_node_names:
3036 feedback_fn("* Empty node group, skipping verification")
3040 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3041 verbose = self.op.verbose
3042 self._feedback_fn = feedback_fn
3044 vg_name = self.cfg.GetVGName()
3045 drbd_helper = self.cfg.GetDRBDHelper()
3046 cluster = self.cfg.GetClusterInfo()
3047 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3048 hypervisors = cluster.enabled_hypervisors
3049 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3051 i_non_redundant = [] # Non redundant instances
3052 i_non_a_balanced = [] # Non auto-balanced instances
3053 i_offline = 0 # Count of offline instances
3054 n_offline = 0 # Count of offline nodes
3055 n_drained = 0 # Count of nodes being drained
3056 node_vol_should = {}
3058 # FIXME: verify OS list
3061 filemap = _ComputeAncillaryFiles(cluster, False)
3063 # do local checksums
3064 master_node = self.master_node = self.cfg.GetMasterNode()
3065 master_ip = self.cfg.GetMasterIP()
3067 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3070 if self.cfg.GetUseExternalMipScript():
3071 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3073 node_verify_param = {
3074 constants.NV_FILELIST:
3075 utils.UniqueSequence(filename
3076 for files in filemap
3077 for filename in files),
3078 constants.NV_NODELIST:
3079 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3080 self.all_node_info.values()),
3081 constants.NV_HYPERVISOR: hypervisors,
3082 constants.NV_HVPARAMS:
3083 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3084 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3085 for node in node_data_list
3086 if not node.offline],
3087 constants.NV_INSTANCELIST: hypervisors,
3088 constants.NV_VERSION: None,
3089 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3090 constants.NV_NODESETUP: None,
3091 constants.NV_TIME: None,
3092 constants.NV_MASTERIP: (master_node, master_ip),
3093 constants.NV_OSLIST: None,
3094 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3095 constants.NV_USERSCRIPTS: user_scripts,
3098 if vg_name is not None:
3099 node_verify_param[constants.NV_VGLIST] = None
3100 node_verify_param[constants.NV_LVLIST] = vg_name
3101 node_verify_param[constants.NV_PVLIST] = [vg_name]
3102 node_verify_param[constants.NV_DRBDLIST] = None
3105 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3108 # FIXME: this needs to be changed per node-group, not cluster-wide
3110 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3111 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3112 bridges.add(default_nicpp[constants.NIC_LINK])
3113 for instance in self.my_inst_info.values():
3114 for nic in instance.nics:
3115 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3116 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3117 bridges.add(full_nic[constants.NIC_LINK])
3120 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3122 # Build our expected cluster state
3123 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3125 vm_capable=node.vm_capable))
3126 for node in node_data_list)
3130 for node in self.all_node_info.values():
3131 path = _SupportsOob(self.cfg, node)
3132 if path and path not in oob_paths:
3133 oob_paths.append(path)
3136 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3138 for instance in self.my_inst_names:
3139 inst_config = self.my_inst_info[instance]
3140 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3143 for nname in inst_config.all_nodes:
3144 if nname not in node_image:
3145 gnode = self.NodeImage(name=nname)
3146 gnode.ghost = (nname not in self.all_node_info)
3147 node_image[nname] = gnode
3149 inst_config.MapLVsByNode(node_vol_should)
3151 pnode = inst_config.primary_node
3152 node_image[pnode].pinst.append(instance)
3154 for snode in inst_config.secondary_nodes:
3155 nimg = node_image[snode]
3156 nimg.sinst.append(instance)
3157 if pnode not in nimg.sbp:
3158 nimg.sbp[pnode] = []
3159 nimg.sbp[pnode].append(instance)
3161 # At this point, we have the in-memory data structures complete,
3162 # except for the runtime information, which we'll gather next
3164 # Due to the way our RPC system works, exact response times cannot be
3165 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3166 # time before and after executing the request, we can at least have a time
3168 nvinfo_starttime = time.time()
3169 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3171 self.cfg.GetClusterName())
3172 nvinfo_endtime = time.time()
3174 if self.extra_lv_nodes and vg_name is not None:
3176 self.rpc.call_node_verify(self.extra_lv_nodes,
3177 {constants.NV_LVLIST: vg_name},
3178 self.cfg.GetClusterName())
3180 extra_lv_nvinfo = {}
3182 all_drbd_map = self.cfg.ComputeDRBDMap()
3184 feedback_fn("* Gathering disk information (%s nodes)" %
3185 len(self.my_node_names))
3186 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3189 feedback_fn("* Verifying configuration file consistency")
3191 # If not all nodes are being checked, we need to make sure the master node
3192 # and a non-checked vm_capable node are in the list.
3193 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3195 vf_nvinfo = all_nvinfo.copy()
3196 vf_node_info = list(self.my_node_info.values())
3197 additional_nodes = []
3198 if master_node not in self.my_node_info:
3199 additional_nodes.append(master_node)
3200 vf_node_info.append(self.all_node_info[master_node])
3201 # Add the first vm_capable node we find which is not included,
3202 # excluding the master node (which we already have)
3203 for node in absent_nodes:
3204 nodeinfo = self.all_node_info[node]
3205 if (nodeinfo.vm_capable and not nodeinfo.offline and
3206 node != master_node):
3207 additional_nodes.append(node)
3208 vf_node_info.append(self.all_node_info[node])
3210 key = constants.NV_FILELIST
3211 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3212 {key: node_verify_param[key]},
3213 self.cfg.GetClusterName()))
3215 vf_nvinfo = all_nvinfo
3216 vf_node_info = self.my_node_info.values()
3218 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3220 feedback_fn("* Verifying node status")
3224 for node_i in node_data_list:
3226 nimg = node_image[node]
3230 feedback_fn("* Skipping offline node %s" % (node,))
3234 if node == master_node:
3236 elif node_i.master_candidate:
3237 ntype = "master candidate"
3238 elif node_i.drained:
3244 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3246 msg = all_nvinfo[node].fail_msg
3247 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3250 nimg.rpc_fail = True
3253 nresult = all_nvinfo[node].payload
3255 nimg.call_ok = self._VerifyNode(node_i, nresult)
3256 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3257 self._VerifyNodeNetwork(node_i, nresult)
3258 self._VerifyNodeUserScripts(node_i, nresult)
3259 self._VerifyOob(node_i, nresult)
3262 self._VerifyNodeLVM(node_i, nresult, vg_name)
3263 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3266 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3267 self._UpdateNodeInstances(node_i, nresult, nimg)
3268 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3269 self._UpdateNodeOS(node_i, nresult, nimg)
3271 if not nimg.os_fail:
3272 if refos_img is None:
3274 self._VerifyNodeOS(node_i, nimg, refos_img)
3275 self._VerifyNodeBridges(node_i, nresult, bridges)
3277 # Check whether all running instancies are primary for the node. (This
3278 # can no longer be done from _VerifyInstance below, since some of the
3279 # wrong instances could be from other node groups.)
3280 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3282 for inst in non_primary_inst:
3283 test = inst in self.all_inst_info
3284 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3285 "instance should not run on node %s", node_i.name)
3286 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3287 "node is running unknown instance %s", inst)
3289 for node, result in extra_lv_nvinfo.items():
3290 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3291 node_image[node], vg_name)
3293 feedback_fn("* Verifying instance status")
3294 for instance in self.my_inst_names:
3296 feedback_fn("* Verifying instance %s" % instance)
3297 inst_config = self.my_inst_info[instance]
3298 self._VerifyInstance(instance, inst_config, node_image,
3300 inst_nodes_offline = []
3302 pnode = inst_config.primary_node
3303 pnode_img = node_image[pnode]
3304 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3305 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3306 " primary node failed", instance)
3308 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3310 constants.CV_EINSTANCEBADNODE, instance,
3311 "instance is marked as running and lives on offline node %s",
3312 inst_config.primary_node)
3314 # If the instance is non-redundant we cannot survive losing its primary
3315 # node, so we are not N+1 compliant. On the other hand we have no disk
3316 # templates with more than one secondary so that situation is not well
3318 # FIXME: does not support file-backed instances
3319 if not inst_config.secondary_nodes:
3320 i_non_redundant.append(instance)
3322 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3323 constants.CV_EINSTANCELAYOUT,
3324 instance, "instance has multiple secondary nodes: %s",
3325 utils.CommaJoin(inst_config.secondary_nodes),
3326 code=self.ETYPE_WARNING)
3328 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3329 pnode = inst_config.primary_node
3330 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3331 instance_groups = {}
3333 for node in instance_nodes:
3334 instance_groups.setdefault(self.all_node_info[node].group,
3338 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3339 # Sort so that we always list the primary node first.
3340 for group, nodes in sorted(instance_groups.items(),
3341 key=lambda (_, nodes): pnode in nodes,
3344 self._ErrorIf(len(instance_groups) > 1,
3345 constants.CV_EINSTANCESPLITGROUPS,
3346 instance, "instance has primary and secondary nodes in"
3347 " different groups: %s", utils.CommaJoin(pretty_list),
3348 code=self.ETYPE_WARNING)
3350 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3351 i_non_a_balanced.append(instance)
3353 for snode in inst_config.secondary_nodes:
3354 s_img = node_image[snode]
3355 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3356 snode, "instance %s, connection to secondary node failed",
3360 inst_nodes_offline.append(snode)
3362 # warn that the instance lives on offline nodes
3363 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3364 "instance has offline secondary node(s) %s",
3365 utils.CommaJoin(inst_nodes_offline))
3366 # ... or ghost/non-vm_capable nodes
3367 for node in inst_config.all_nodes:
3368 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3369 instance, "instance lives on ghost node %s", node)
3370 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3371 instance, "instance lives on non-vm_capable node %s", node)
3373 feedback_fn("* Verifying orphan volumes")
3374 reserved = utils.FieldSet(*cluster.reserved_lvs)
3376 # We will get spurious "unknown volume" warnings if any node of this group
3377 # is secondary for an instance whose primary is in another group. To avoid
3378 # them, we find these instances and add their volumes to node_vol_should.
3379 for inst in self.all_inst_info.values():
3380 for secondary in inst.secondary_nodes:
3381 if (secondary in self.my_node_info
3382 and inst.name not in self.my_inst_info):
3383 inst.MapLVsByNode(node_vol_should)
3386 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3388 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3389 feedback_fn("* Verifying N+1 Memory redundancy")
3390 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3392 feedback_fn("* Other Notes")
3394 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3395 % len(i_non_redundant))
3397 if i_non_a_balanced:
3398 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3399 % len(i_non_a_balanced))
3402 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3405 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3408 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3412 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3413 """Analyze the post-hooks' result
3415 This method analyses the hook result, handles it, and sends some
3416 nicely-formatted feedback back to the user.
3418 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3419 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3420 @param hooks_results: the results of the multi-node hooks rpc call
3421 @param feedback_fn: function used send feedback back to the caller
3422 @param lu_result: previous Exec result
3423 @return: the new Exec result, based on the previous result
3427 # We only really run POST phase hooks, only for non-empty groups,
3428 # and are only interested in their results
3429 if not self.my_node_names:
3432 elif phase == constants.HOOKS_PHASE_POST:
3433 # Used to change hooks' output to proper indentation
3434 feedback_fn("* Hooks Results")
3435 assert hooks_results, "invalid result from hooks"
3437 for node_name in hooks_results:
3438 res = hooks_results[node_name]
3440 test = msg and not res.offline
3441 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3442 "Communication failure in hooks execution: %s", msg)
3443 if res.offline or msg:
3444 # No need to investigate payload if node is offline or gave
3447 for script, hkr, output in res.payload:
3448 test = hkr == constants.HKR_FAIL
3449 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3450 "Script %s failed, output:", script)
3452 output = self._HOOKS_INDENT_RE.sub(" ", output)
3453 feedback_fn("%s" % output)
3459 class LUClusterVerifyDisks(NoHooksLU):
3460 """Verifies the cluster disks status.
3465 def ExpandNames(self):
3466 self.share_locks = _ShareAll()
3467 self.needed_locks = {
3468 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3471 def Exec(self, feedback_fn):
3472 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3474 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3475 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3476 for group in group_names])
3479 class LUGroupVerifyDisks(NoHooksLU):
3480 """Verifies the status of all disks in a node group.
3485 def ExpandNames(self):
3486 # Raises errors.OpPrereqError on its own if group can't be found
3487 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3489 self.share_locks = _ShareAll()
3490 self.needed_locks = {
3491 locking.LEVEL_INSTANCE: [],
3492 locking.LEVEL_NODEGROUP: [],
3493 locking.LEVEL_NODE: [],
3496 def DeclareLocks(self, level):
3497 if level == locking.LEVEL_INSTANCE:
3498 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3500 # Lock instances optimistically, needs verification once node and group
3501 # locks have been acquired
3502 self.needed_locks[locking.LEVEL_INSTANCE] = \
3503 self.cfg.GetNodeGroupInstances(self.group_uuid)
3505 elif level == locking.LEVEL_NODEGROUP:
3506 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3508 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3509 set([self.group_uuid] +
3510 # Lock all groups used by instances optimistically; this requires
3511 # going via the node before it's locked, requiring verification
3514 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3515 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3517 elif level == locking.LEVEL_NODE:
3518 # This will only lock the nodes in the group to be verified which contain
3520 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3521 self._LockInstancesNodes()
3523 # Lock all nodes in group to be verified
3524 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3525 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3526 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3528 def CheckPrereq(self):
3529 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3530 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3531 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3533 assert self.group_uuid in owned_groups
3535 # Check if locked instances are still correct
3536 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3538 # Get instance information
3539 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3541 # Check if node groups for locked instances are still correct
3542 _CheckInstancesNodeGroups(self.cfg, self.instances,
3543 owned_groups, owned_nodes, self.group_uuid)
3545 def Exec(self, feedback_fn):
3546 """Verify integrity of cluster disks.
3548 @rtype: tuple of three items
3549 @return: a tuple of (dict of node-to-node_error, list of instances
3550 which need activate-disks, dict of instance: (node, volume) for
3555 res_instances = set()
3558 nv_dict = _MapInstanceDisksToNodes(
3559 [inst for inst in self.instances.values()
3560 if inst.admin_state == constants.ADMINST_UP])
3563 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3564 set(self.cfg.GetVmCapableNodeList()))
3566 node_lvs = self.rpc.call_lv_list(nodes, [])
3568 for (node, node_res) in node_lvs.items():
3569 if node_res.offline:
3572 msg = node_res.fail_msg
3574 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3575 res_nodes[node] = msg
3578 for lv_name, (_, _, lv_online) in node_res.payload.items():
3579 inst = nv_dict.pop((node, lv_name), None)
3580 if not (lv_online or inst is None):
3581 res_instances.add(inst)
3583 # any leftover items in nv_dict are missing LVs, let's arrange the data
3585 for key, inst in nv_dict.iteritems():
3586 res_missing.setdefault(inst, []).append(list(key))
3588 return (res_nodes, list(res_instances), res_missing)
3591 class LUClusterRepairDiskSizes(NoHooksLU):
3592 """Verifies the cluster disks sizes.
3597 def ExpandNames(self):
3598 if self.op.instances:
3599 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3600 self.needed_locks = {
3601 locking.LEVEL_NODE_RES: [],
3602 locking.LEVEL_INSTANCE: self.wanted_names,
3604 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3606 self.wanted_names = None
3607 self.needed_locks = {
3608 locking.LEVEL_NODE_RES: locking.ALL_SET,
3609 locking.LEVEL_INSTANCE: locking.ALL_SET,
3611 self.share_locks = {
3612 locking.LEVEL_NODE_RES: 1,
3613 locking.LEVEL_INSTANCE: 0,
3616 def DeclareLocks(self, level):
3617 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3618 self._LockInstancesNodes(primary_only=True, level=level)
3620 def CheckPrereq(self):
3621 """Check prerequisites.
3623 This only checks the optional instance list against the existing names.
3626 if self.wanted_names is None:
3627 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3629 self.wanted_instances = \
3630 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3632 def _EnsureChildSizes(self, disk):
3633 """Ensure children of the disk have the needed disk size.
3635 This is valid mainly for DRBD8 and fixes an issue where the
3636 children have smaller disk size.
3638 @param disk: an L{ganeti.objects.Disk} object
3641 if disk.dev_type == constants.LD_DRBD8:
3642 assert disk.children, "Empty children for DRBD8?"
3643 fchild = disk.children[0]
3644 mismatch = fchild.size < disk.size
3646 self.LogInfo("Child disk has size %d, parent %d, fixing",
3647 fchild.size, disk.size)
3648 fchild.size = disk.size
3650 # and we recurse on this child only, not on the metadev
3651 return self._EnsureChildSizes(fchild) or mismatch
3655 def Exec(self, feedback_fn):
3656 """Verify the size of cluster disks.
3659 # TODO: check child disks too
3660 # TODO: check differences in size between primary/secondary nodes
3662 for instance in self.wanted_instances:
3663 pnode = instance.primary_node
3664 if pnode not in per_node_disks:
3665 per_node_disks[pnode] = []
3666 for idx, disk in enumerate(instance.disks):
3667 per_node_disks[pnode].append((instance, idx, disk))
3669 assert not (frozenset(per_node_disks.keys()) -
3670 self.owned_locks(locking.LEVEL_NODE_RES)), \
3671 "Not owning correct locks"
3672 assert not self.owned_locks(locking.LEVEL_NODE)
3675 for node, dskl in per_node_disks.items():
3676 newl = [v[2].Copy() for v in dskl]
3678 self.cfg.SetDiskID(dsk, node)
3679 result = self.rpc.call_blockdev_getsize(node, newl)
3681 self.LogWarning("Failure in blockdev_getsize call to node"
3682 " %s, ignoring", node)
3684 if len(result.payload) != len(dskl):
3685 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3686 " result.payload=%s", node, len(dskl), result.payload)
3687 self.LogWarning("Invalid result from node %s, ignoring node results",
3690 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3692 self.LogWarning("Disk %d of instance %s did not return size"
3693 " information, ignoring", idx, instance.name)
3695 if not isinstance(size, (int, long)):
3696 self.LogWarning("Disk %d of instance %s did not return valid"
3697 " size information, ignoring", idx, instance.name)
3700 if size != disk.size:
3701 self.LogInfo("Disk %d of instance %s has mismatched size,"
3702 " correcting: recorded %d, actual %d", idx,
3703 instance.name, disk.size, size)
3705 self.cfg.Update(instance, feedback_fn)
3706 changed.append((instance.name, idx, size))
3707 if self._EnsureChildSizes(disk):
3708 self.cfg.Update(instance, feedback_fn)
3709 changed.append((instance.name, idx, disk.size))
3713 class LUClusterRename(LogicalUnit):
3714 """Rename the cluster.
3717 HPATH = "cluster-rename"
3718 HTYPE = constants.HTYPE_CLUSTER
3720 def BuildHooksEnv(self):
3725 "OP_TARGET": self.cfg.GetClusterName(),
3726 "NEW_NAME": self.op.name,
3729 def BuildHooksNodes(self):
3730 """Build hooks nodes.
3733 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3735 def CheckPrereq(self):
3736 """Verify that the passed name is a valid one.
3739 hostname = netutils.GetHostname(name=self.op.name,
3740 family=self.cfg.GetPrimaryIPFamily())
3742 new_name = hostname.name
3743 self.ip = new_ip = hostname.ip
3744 old_name = self.cfg.GetClusterName()
3745 old_ip = self.cfg.GetMasterIP()
3746 if new_name == old_name and new_ip == old_ip:
3747 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3748 " cluster has changed",
3750 if new_ip != old_ip:
3751 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3752 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3753 " reachable on the network" %
3754 new_ip, errors.ECODE_NOTUNIQUE)
3756 self.op.name = new_name
3758 def Exec(self, feedback_fn):
3759 """Rename the cluster.
3762 clustername = self.op.name
3765 # shutdown the master IP
3766 master_params = self.cfg.GetMasterNetworkParameters()
3767 ems = self.cfg.GetUseExternalMipScript()
3768 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3770 result.Raise("Could not disable the master role")
3773 cluster = self.cfg.GetClusterInfo()
3774 cluster.cluster_name = clustername
3775 cluster.master_ip = new_ip
3776 self.cfg.Update(cluster, feedback_fn)
3778 # update the known hosts file
3779 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3780 node_list = self.cfg.GetOnlineNodeList()
3782 node_list.remove(master_params.name)
3785 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3787 master_params.ip = new_ip
3788 result = self.rpc.call_node_activate_master_ip(master_params.name,
3790 msg = result.fail_msg
3792 self.LogWarning("Could not re-enable the master role on"
3793 " the master, please restart manually: %s", msg)
3798 def _ValidateNetmask(cfg, netmask):
3799 """Checks if a netmask is valid.
3801 @type cfg: L{config.ConfigWriter}
3802 @param cfg: The cluster configuration
3804 @param netmask: the netmask to be verified
3805 @raise errors.OpPrereqError: if the validation fails
3808 ip_family = cfg.GetPrimaryIPFamily()
3810 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3811 except errors.ProgrammerError:
3812 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3813 ip_family, errors.ECODE_INVAL)
3814 if not ipcls.ValidateNetmask(netmask):
3815 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3816 (netmask), errors.ECODE_INVAL)
3819 class LUClusterSetParams(LogicalUnit):
3820 """Change the parameters of the cluster.
3823 HPATH = "cluster-modify"
3824 HTYPE = constants.HTYPE_CLUSTER
3827 def CheckArguments(self):
3831 if self.op.uid_pool:
3832 uidpool.CheckUidPool(self.op.uid_pool)
3834 if self.op.add_uids:
3835 uidpool.CheckUidPool(self.op.add_uids)
3837 if self.op.remove_uids:
3838 uidpool.CheckUidPool(self.op.remove_uids)
3840 if self.op.master_netmask is not None:
3841 _ValidateNetmask(self.cfg, self.op.master_netmask)
3843 if self.op.diskparams:
3844 for dt_params in self.op.diskparams.values():
3845 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3847 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3848 except errors.OpPrereqError, err:
3849 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3852 def ExpandNames(self):
3853 # FIXME: in the future maybe other cluster params won't require checking on
3854 # all nodes to be modified.
3855 self.needed_locks = {
3856 locking.LEVEL_NODE: locking.ALL_SET,
3857 locking.LEVEL_INSTANCE: locking.ALL_SET,
3858 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3860 self.share_locks = {
3861 locking.LEVEL_NODE: 1,
3862 locking.LEVEL_INSTANCE: 1,
3863 locking.LEVEL_NODEGROUP: 1,
3866 def BuildHooksEnv(self):
3871 "OP_TARGET": self.cfg.GetClusterName(),
3872 "NEW_VG_NAME": self.op.vg_name,
3875 def BuildHooksNodes(self):
3876 """Build hooks nodes.
3879 mn = self.cfg.GetMasterNode()
3882 def CheckPrereq(self):
3883 """Check prerequisites.
3885 This checks whether the given params don't conflict and
3886 if the given volume group is valid.
3889 if self.op.vg_name is not None and not self.op.vg_name:
3890 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3891 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3892 " instances exist", errors.ECODE_INVAL)
3894 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3895 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3896 raise errors.OpPrereqError("Cannot disable drbd helper while"
3897 " drbd-based instances exist",
3900 node_list = self.owned_locks(locking.LEVEL_NODE)
3902 # if vg_name not None, checks given volume group on all nodes
3904 vglist = self.rpc.call_vg_list(node_list)
3905 for node in node_list:
3906 msg = vglist[node].fail_msg
3908 # ignoring down node
3909 self.LogWarning("Error while gathering data on node %s"
3910 " (ignoring node): %s", node, msg)
3912 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3914 constants.MIN_VG_SIZE)
3916 raise errors.OpPrereqError("Error on node '%s': %s" %
3917 (node, vgstatus), errors.ECODE_ENVIRON)
3919 if self.op.drbd_helper:
3920 # checks given drbd helper on all nodes
3921 helpers = self.rpc.call_drbd_helper(node_list)
3922 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3924 self.LogInfo("Not checking drbd helper on offline node %s", node)
3926 msg = helpers[node].fail_msg
3928 raise errors.OpPrereqError("Error checking drbd helper on node"
3929 " '%s': %s" % (node, msg),
3930 errors.ECODE_ENVIRON)
3931 node_helper = helpers[node].payload
3932 if node_helper != self.op.drbd_helper:
3933 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3934 (node, node_helper), errors.ECODE_ENVIRON)
3936 self.cluster = cluster = self.cfg.GetClusterInfo()
3937 # validate params changes
3938 if self.op.beparams:
3939 objects.UpgradeBeParams(self.op.beparams)
3940 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3941 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3943 if self.op.ndparams:
3944 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3945 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3947 # TODO: we need a more general way to handle resetting
3948 # cluster-level parameters to default values
3949 if self.new_ndparams["oob_program"] == "":
3950 self.new_ndparams["oob_program"] = \
3951 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3953 if self.op.hv_state:
3954 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3955 self.cluster.hv_state_static)
3956 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3957 for hv, values in new_hv_state.items())
3959 if self.op.disk_state:
3960 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3961 self.cluster.disk_state_static)
3962 self.new_disk_state = \
3963 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3964 for name, values in svalues.items()))
3965 for storage, svalues in new_disk_state.items())
3968 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3971 all_instances = self.cfg.GetAllInstancesInfo().values()
3973 for group in self.cfg.GetAllNodeGroupsInfo().values():
3974 instances = frozenset([inst for inst in all_instances
3975 if compat.any(node in group.members
3976 for node in inst.all_nodes)])
3977 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3978 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3979 new = _ComputeNewInstanceViolations(ipol,
3980 new_ipolicy, instances)
3982 violations.update(new)
3985 self.LogWarning("After the ipolicy change the following instances"
3986 " violate them: %s",
3987 utils.CommaJoin(utils.NiceSort(violations)))
3989 if self.op.nicparams:
3990 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3991 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3992 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3995 # check all instances for consistency
3996 for instance in self.cfg.GetAllInstancesInfo().values():
3997 for nic_idx, nic in enumerate(instance.nics):
3998 params_copy = copy.deepcopy(nic.nicparams)
3999 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4001 # check parameter syntax
4003 objects.NIC.CheckParameterSyntax(params_filled)
4004 except errors.ConfigurationError, err:
4005 nic_errors.append("Instance %s, nic/%d: %s" %
4006 (instance.name, nic_idx, err))
4008 # if we're moving instances to routed, check that they have an ip
4009 target_mode = params_filled[constants.NIC_MODE]
4010 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4011 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4012 " address" % (instance.name, nic_idx))
4014 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4015 "\n".join(nic_errors), errors.ECODE_INVAL)
4017 # hypervisor list/parameters
4018 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4019 if self.op.hvparams:
4020 for hv_name, hv_dict in self.op.hvparams.items():
4021 if hv_name not in self.new_hvparams:
4022 self.new_hvparams[hv_name] = hv_dict
4024 self.new_hvparams[hv_name].update(hv_dict)
4026 # disk template parameters
4027 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4028 if self.op.diskparams:
4029 for dt_name, dt_params in self.op.diskparams.items():
4030 if dt_name not in self.op.diskparams:
4031 self.new_diskparams[dt_name] = dt_params
4033 self.new_diskparams[dt_name].update(dt_params)
4035 # os hypervisor parameters
4036 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4038 for os_name, hvs in self.op.os_hvp.items():
4039 if os_name not in self.new_os_hvp:
4040 self.new_os_hvp[os_name] = hvs
4042 for hv_name, hv_dict in hvs.items():
4043 if hv_name not in self.new_os_hvp[os_name]:
4044 self.new_os_hvp[os_name][hv_name] = hv_dict
4046 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4049 self.new_osp = objects.FillDict(cluster.osparams, {})
4050 if self.op.osparams:
4051 for os_name, osp in self.op.osparams.items():
4052 if os_name not in self.new_osp:
4053 self.new_osp[os_name] = {}
4055 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4058 if not self.new_osp[os_name]:
4059 # we removed all parameters
4060 del self.new_osp[os_name]
4062 # check the parameter validity (remote check)
4063 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4064 os_name, self.new_osp[os_name])
4066 # changes to the hypervisor list
4067 if self.op.enabled_hypervisors is not None:
4068 self.hv_list = self.op.enabled_hypervisors
4069 for hv in self.hv_list:
4070 # if the hypervisor doesn't already exist in the cluster
4071 # hvparams, we initialize it to empty, and then (in both
4072 # cases) we make sure to fill the defaults, as we might not
4073 # have a complete defaults list if the hypervisor wasn't
4075 if hv not in new_hvp:
4077 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4078 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4080 self.hv_list = cluster.enabled_hypervisors
4082 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4083 # either the enabled list has changed, or the parameters have, validate
4084 for hv_name, hv_params in self.new_hvparams.items():
4085 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4086 (self.op.enabled_hypervisors and
4087 hv_name in self.op.enabled_hypervisors)):
4088 # either this is a new hypervisor, or its parameters have changed
4089 hv_class = hypervisor.GetHypervisor(hv_name)
4090 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4091 hv_class.CheckParameterSyntax(hv_params)
4092 _CheckHVParams(self, node_list, hv_name, hv_params)
4095 # no need to check any newly-enabled hypervisors, since the
4096 # defaults have already been checked in the above code-block
4097 for os_name, os_hvp in self.new_os_hvp.items():
4098 for hv_name, hv_params in os_hvp.items():
4099 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4100 # we need to fill in the new os_hvp on top of the actual hv_p
4101 cluster_defaults = self.new_hvparams.get(hv_name, {})
4102 new_osp = objects.FillDict(cluster_defaults, hv_params)
4103 hv_class = hypervisor.GetHypervisor(hv_name)
4104 hv_class.CheckParameterSyntax(new_osp)
4105 _CheckHVParams(self, node_list, hv_name, new_osp)
4107 if self.op.default_iallocator:
4108 alloc_script = utils.FindFile(self.op.default_iallocator,
4109 constants.IALLOCATOR_SEARCH_PATH,
4111 if alloc_script is None:
4112 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4113 " specified" % self.op.default_iallocator,
4116 def Exec(self, feedback_fn):
4117 """Change the parameters of the cluster.
4120 if self.op.vg_name is not None:
4121 new_volume = self.op.vg_name
4124 if new_volume != self.cfg.GetVGName():
4125 self.cfg.SetVGName(new_volume)
4127 feedback_fn("Cluster LVM configuration already in desired"
4128 " state, not changing")
4129 if self.op.drbd_helper is not None:
4130 new_helper = self.op.drbd_helper
4133 if new_helper != self.cfg.GetDRBDHelper():
4134 self.cfg.SetDRBDHelper(new_helper)
4136 feedback_fn("Cluster DRBD helper already in desired state,"
4138 if self.op.hvparams:
4139 self.cluster.hvparams = self.new_hvparams
4141 self.cluster.os_hvp = self.new_os_hvp
4142 if self.op.enabled_hypervisors is not None:
4143 self.cluster.hvparams = self.new_hvparams
4144 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4145 if self.op.beparams:
4146 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4147 if self.op.nicparams:
4148 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4150 self.cluster.ipolicy = self.new_ipolicy
4151 if self.op.osparams:
4152 self.cluster.osparams = self.new_osp
4153 if self.op.ndparams:
4154 self.cluster.ndparams = self.new_ndparams
4155 if self.op.diskparams:
4156 self.cluster.diskparams = self.new_diskparams
4157 if self.op.hv_state:
4158 self.cluster.hv_state_static = self.new_hv_state
4159 if self.op.disk_state:
4160 self.cluster.disk_state_static = self.new_disk_state
4162 if self.op.candidate_pool_size is not None:
4163 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4164 # we need to update the pool size here, otherwise the save will fail
4165 _AdjustCandidatePool(self, [])
4167 if self.op.maintain_node_health is not None:
4168 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4169 feedback_fn("Note: CONFD was disabled at build time, node health"
4170 " maintenance is not useful (still enabling it)")
4171 self.cluster.maintain_node_health = self.op.maintain_node_health
4173 if self.op.prealloc_wipe_disks is not None:
4174 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4176 if self.op.add_uids is not None:
4177 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4179 if self.op.remove_uids is not None:
4180 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4182 if self.op.uid_pool is not None:
4183 self.cluster.uid_pool = self.op.uid_pool
4185 if self.op.default_iallocator is not None:
4186 self.cluster.default_iallocator = self.op.default_iallocator
4188 if self.op.reserved_lvs is not None:
4189 self.cluster.reserved_lvs = self.op.reserved_lvs
4191 if self.op.use_external_mip_script is not None:
4192 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4194 def helper_os(aname, mods, desc):
4196 lst = getattr(self.cluster, aname)
4197 for key, val in mods:
4198 if key == constants.DDM_ADD:
4200 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4203 elif key == constants.DDM_REMOVE:
4207 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4209 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4211 if self.op.hidden_os:
4212 helper_os("hidden_os", self.op.hidden_os, "hidden")
4214 if self.op.blacklisted_os:
4215 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4217 if self.op.master_netdev:
4218 master_params = self.cfg.GetMasterNetworkParameters()
4219 ems = self.cfg.GetUseExternalMipScript()
4220 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4221 self.cluster.master_netdev)
4222 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4224 result.Raise("Could not disable the master ip")
4225 feedback_fn("Changing master_netdev from %s to %s" %
4226 (master_params.netdev, self.op.master_netdev))
4227 self.cluster.master_netdev = self.op.master_netdev
4229 if self.op.master_netmask:
4230 master_params = self.cfg.GetMasterNetworkParameters()
4231 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4232 result = self.rpc.call_node_change_master_netmask(master_params.name,
4233 master_params.netmask,
4234 self.op.master_netmask,
4236 master_params.netdev)
4238 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4241 self.cluster.master_netmask = self.op.master_netmask
4243 self.cfg.Update(self.cluster, feedback_fn)
4245 if self.op.master_netdev:
4246 master_params = self.cfg.GetMasterNetworkParameters()
4247 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4248 self.op.master_netdev)
4249 ems = self.cfg.GetUseExternalMipScript()
4250 result = self.rpc.call_node_activate_master_ip(master_params.name,
4253 self.LogWarning("Could not re-enable the master ip on"
4254 " the master, please restart manually: %s",
4258 def _UploadHelper(lu, nodes, fname):
4259 """Helper for uploading a file and showing warnings.
4262 if os.path.exists(fname):
4263 result = lu.rpc.call_upload_file(nodes, fname)
4264 for to_node, to_result in result.items():
4265 msg = to_result.fail_msg
4267 msg = ("Copy of file %s to node %s failed: %s" %
4268 (fname, to_node, msg))
4269 lu.proc.LogWarning(msg)
4272 def _ComputeAncillaryFiles(cluster, redist):
4273 """Compute files external to Ganeti which need to be consistent.
4275 @type redist: boolean
4276 @param redist: Whether to include files which need to be redistributed
4279 # Compute files for all nodes
4281 constants.SSH_KNOWN_HOSTS_FILE,
4282 constants.CONFD_HMAC_KEY,
4283 constants.CLUSTER_DOMAIN_SECRET_FILE,
4284 constants.SPICE_CERT_FILE,
4285 constants.SPICE_CACERT_FILE,
4286 constants.RAPI_USERS_FILE,
4290 files_all.update(constants.ALL_CERT_FILES)
4291 files_all.update(ssconf.SimpleStore().GetFileList())
4293 # we need to ship at least the RAPI certificate
4294 files_all.add(constants.RAPI_CERT_FILE)
4296 if cluster.modify_etc_hosts:
4297 files_all.add(constants.ETC_HOSTS)
4299 if cluster.use_external_mip_script:
4300 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4302 # Files which are optional, these must:
4303 # - be present in one other category as well
4304 # - either exist or not exist on all nodes of that category (mc, vm all)
4306 constants.RAPI_USERS_FILE,
4309 # Files which should only be on master candidates
4313 files_mc.add(constants.CLUSTER_CONF_FILE)
4315 # Files which should only be on VM-capable nodes
4318 for hv_name in cluster.enabled_hypervisors
4319 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4323 for hv_name in cluster.enabled_hypervisors
4324 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4326 # Filenames in each category must be unique
4327 all_files_set = files_all | files_mc | files_vm
4328 assert (len(all_files_set) ==
4329 sum(map(len, [files_all, files_mc, files_vm]))), \
4330 "Found file listed in more than one file list"
4332 # Optional files must be present in one other category
4333 assert all_files_set.issuperset(files_opt), \
4334 "Optional file not in a different required list"
4336 return (files_all, files_opt, files_mc, files_vm)
4339 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4340 """Distribute additional files which are part of the cluster configuration.
4342 ConfigWriter takes care of distributing the config and ssconf files, but
4343 there are more files which should be distributed to all nodes. This function
4344 makes sure those are copied.
4346 @param lu: calling logical unit
4347 @param additional_nodes: list of nodes not in the config to distribute to
4348 @type additional_vm: boolean
4349 @param additional_vm: whether the additional nodes are vm-capable or not
4352 # Gather target nodes
4353 cluster = lu.cfg.GetClusterInfo()
4354 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4356 online_nodes = lu.cfg.GetOnlineNodeList()
4357 online_set = frozenset(online_nodes)
4358 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4360 if additional_nodes is not None:
4361 online_nodes.extend(additional_nodes)
4363 vm_nodes.extend(additional_nodes)
4365 # Never distribute to master node
4366 for nodelist in [online_nodes, vm_nodes]:
4367 if master_info.name in nodelist:
4368 nodelist.remove(master_info.name)
4371 (files_all, _, files_mc, files_vm) = \
4372 _ComputeAncillaryFiles(cluster, True)
4374 # Never re-distribute configuration file from here
4375 assert not (constants.CLUSTER_CONF_FILE in files_all or
4376 constants.CLUSTER_CONF_FILE in files_vm)
4377 assert not files_mc, "Master candidates not handled in this function"
4380 (online_nodes, files_all),
4381 (vm_nodes, files_vm),
4385 for (node_list, files) in filemap:
4387 _UploadHelper(lu, node_list, fname)
4390 class LUClusterRedistConf(NoHooksLU):
4391 """Force the redistribution of cluster configuration.
4393 This is a very simple LU.
4398 def ExpandNames(self):
4399 self.needed_locks = {
4400 locking.LEVEL_NODE: locking.ALL_SET,
4402 self.share_locks[locking.LEVEL_NODE] = 1
4404 def Exec(self, feedback_fn):
4405 """Redistribute the configuration.
4408 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4409 _RedistributeAncillaryFiles(self)
4412 class LUClusterActivateMasterIp(NoHooksLU):
4413 """Activate the master IP on the master node.
4416 def Exec(self, feedback_fn):
4417 """Activate the master IP.
4420 master_params = self.cfg.GetMasterNetworkParameters()
4421 ems = self.cfg.GetUseExternalMipScript()
4422 result = self.rpc.call_node_activate_master_ip(master_params.name,
4424 result.Raise("Could not activate the master IP")
4427 class LUClusterDeactivateMasterIp(NoHooksLU):
4428 """Deactivate the master IP on the master node.
4431 def Exec(self, feedback_fn):
4432 """Deactivate the master IP.
4435 master_params = self.cfg.GetMasterNetworkParameters()
4436 ems = self.cfg.GetUseExternalMipScript()
4437 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4439 result.Raise("Could not deactivate the master IP")
4442 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4443 """Sleep and poll for an instance's disk to sync.
4446 if not instance.disks or disks is not None and not disks:
4449 disks = _ExpandCheckDisks(instance, disks)
4452 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4454 node = instance.primary_node
4457 lu.cfg.SetDiskID(dev, node)
4459 # TODO: Convert to utils.Retry
4462 degr_retries = 10 # in seconds, as we sleep 1 second each time
4466 cumul_degraded = False
4467 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4468 msg = rstats.fail_msg
4470 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4473 raise errors.RemoteError("Can't contact node %s for mirror data,"
4474 " aborting." % node)
4477 rstats = rstats.payload
4479 for i, mstat in enumerate(rstats):
4481 lu.LogWarning("Can't compute data for node %s/%s",
4482 node, disks[i].iv_name)
4485 cumul_degraded = (cumul_degraded or
4486 (mstat.is_degraded and mstat.sync_percent is None))
4487 if mstat.sync_percent is not None:
4489 if mstat.estimated_time is not None:
4490 rem_time = ("%s remaining (estimated)" %
4491 utils.FormatSeconds(mstat.estimated_time))
4492 max_time = mstat.estimated_time
4494 rem_time = "no time estimate"
4495 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4496 (disks[i].iv_name, mstat.sync_percent, rem_time))
4498 # if we're done but degraded, let's do a few small retries, to
4499 # make sure we see a stable and not transient situation; therefore
4500 # we force restart of the loop
4501 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4502 logging.info("Degraded disks found, %d retries left", degr_retries)
4510 time.sleep(min(60, max_time))
4513 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4514 return not cumul_degraded
4517 def _BlockdevFind(lu, node, dev, instance):
4518 """Wrapper around call_blockdev_find to annotate diskparams.
4520 @param lu: A reference to the lu object
4521 @param node: The node to call out
4522 @param dev: The device to find
4523 @param instance: The instance object the device belongs to
4524 @returns The result of the rpc call
4527 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4528 return lu.rpc.call_blockdev_find(node, disk)
4531 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4532 """Wrapper around L{_CheckDiskConsistencyInner}.
4535 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4536 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4540 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4542 """Check that mirrors are not degraded.
4544 @attention: The device has to be annotated already.
4546 The ldisk parameter, if True, will change the test from the
4547 is_degraded attribute (which represents overall non-ok status for
4548 the device(s)) to the ldisk (representing the local storage status).
4551 lu.cfg.SetDiskID(dev, node)
4555 if on_primary or dev.AssembleOnSecondary():
4556 rstats = lu.rpc.call_blockdev_find(node, dev)
4557 msg = rstats.fail_msg
4559 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4561 elif not rstats.payload:
4562 lu.LogWarning("Can't find disk on node %s", node)
4566 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4568 result = result and not rstats.payload.is_degraded
4571 for child in dev.children:
4572 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4578 class LUOobCommand(NoHooksLU):
4579 """Logical unit for OOB handling.
4583 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4585 def ExpandNames(self):
4586 """Gather locks we need.
4589 if self.op.node_names:
4590 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4591 lock_names = self.op.node_names
4593 lock_names = locking.ALL_SET
4595 self.needed_locks = {
4596 locking.LEVEL_NODE: lock_names,
4599 def CheckPrereq(self):
4600 """Check prerequisites.
4603 - the node exists in the configuration
4606 Any errors are signaled by raising errors.OpPrereqError.
4610 self.master_node = self.cfg.GetMasterNode()
4612 assert self.op.power_delay >= 0.0
4614 if self.op.node_names:
4615 if (self.op.command in self._SKIP_MASTER and
4616 self.master_node in self.op.node_names):
4617 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4618 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4620 if master_oob_handler:
4621 additional_text = ("run '%s %s %s' if you want to operate on the"
4622 " master regardless") % (master_oob_handler,
4626 additional_text = "it does not support out-of-band operations"
4628 raise errors.OpPrereqError(("Operating on the master node %s is not"
4629 " allowed for %s; %s") %
4630 (self.master_node, self.op.command,
4631 additional_text), errors.ECODE_INVAL)
4633 self.op.node_names = self.cfg.GetNodeList()
4634 if self.op.command in self._SKIP_MASTER:
4635 self.op.node_names.remove(self.master_node)
4637 if self.op.command in self._SKIP_MASTER:
4638 assert self.master_node not in self.op.node_names
4640 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4642 raise errors.OpPrereqError("Node %s not found" % node_name,
4645 self.nodes.append(node)
4647 if (not self.op.ignore_status and
4648 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4649 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4650 " not marked offline") % node_name,
4653 def Exec(self, feedback_fn):
4654 """Execute OOB and return result if we expect any.
4657 master_node = self.master_node
4660 for idx, node in enumerate(utils.NiceSort(self.nodes,
4661 key=lambda node: node.name)):
4662 node_entry = [(constants.RS_NORMAL, node.name)]
4663 ret.append(node_entry)
4665 oob_program = _SupportsOob(self.cfg, node)
4668 node_entry.append((constants.RS_UNAVAIL, None))
4671 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4672 self.op.command, oob_program, node.name)
4673 result = self.rpc.call_run_oob(master_node, oob_program,
4674 self.op.command, node.name,
4678 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4679 node.name, result.fail_msg)
4680 node_entry.append((constants.RS_NODATA, None))
4683 self._CheckPayload(result)
4684 except errors.OpExecError, err:
4685 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4687 node_entry.append((constants.RS_NODATA, None))
4689 if self.op.command == constants.OOB_HEALTH:
4690 # For health we should log important events
4691 for item, status in result.payload:
4692 if status in [constants.OOB_STATUS_WARNING,
4693 constants.OOB_STATUS_CRITICAL]:
4694 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4695 item, node.name, status)
4697 if self.op.command == constants.OOB_POWER_ON:
4699 elif self.op.command == constants.OOB_POWER_OFF:
4700 node.powered = False
4701 elif self.op.command == constants.OOB_POWER_STATUS:
4702 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4703 if powered != node.powered:
4704 logging.warning(("Recorded power state (%s) of node '%s' does not"
4705 " match actual power state (%s)"), node.powered,
4708 # For configuration changing commands we should update the node
4709 if self.op.command in (constants.OOB_POWER_ON,
4710 constants.OOB_POWER_OFF):
4711 self.cfg.Update(node, feedback_fn)
4713 node_entry.append((constants.RS_NORMAL, result.payload))
4715 if (self.op.command == constants.OOB_POWER_ON and
4716 idx < len(self.nodes) - 1):
4717 time.sleep(self.op.power_delay)
4721 def _CheckPayload(self, result):
4722 """Checks if the payload is valid.
4724 @param result: RPC result
4725 @raises errors.OpExecError: If payload is not valid
4729 if self.op.command == constants.OOB_HEALTH:
4730 if not isinstance(result.payload, list):
4731 errs.append("command 'health' is expected to return a list but got %s" %
4732 type(result.payload))
4734 for item, status in result.payload:
4735 if status not in constants.OOB_STATUSES:
4736 errs.append("health item '%s' has invalid status '%s'" %
4739 if self.op.command == constants.OOB_POWER_STATUS:
4740 if not isinstance(result.payload, dict):
4741 errs.append("power-status is expected to return a dict but got %s" %
4742 type(result.payload))
4744 if self.op.command in [
4745 constants.OOB_POWER_ON,
4746 constants.OOB_POWER_OFF,
4747 constants.OOB_POWER_CYCLE,
4749 if result.payload is not None:
4750 errs.append("%s is expected to not return payload but got '%s'" %
4751 (self.op.command, result.payload))
4754 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4755 utils.CommaJoin(errs))
4758 class _OsQuery(_QueryBase):
4759 FIELDS = query.OS_FIELDS
4761 def ExpandNames(self, lu):
4762 # Lock all nodes in shared mode
4763 # Temporary removal of locks, should be reverted later
4764 # TODO: reintroduce locks when they are lighter-weight
4765 lu.needed_locks = {}
4766 #self.share_locks[locking.LEVEL_NODE] = 1
4767 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4769 # The following variables interact with _QueryBase._GetNames
4771 self.wanted = self.names
4773 self.wanted = locking.ALL_SET
4775 self.do_locking = self.use_locking
4777 def DeclareLocks(self, lu, level):
4781 def _DiagnoseByOS(rlist):
4782 """Remaps a per-node return list into an a per-os per-node dictionary
4784 @param rlist: a map with node names as keys and OS objects as values
4787 @return: a dictionary with osnames as keys and as value another
4788 map, with nodes as keys and tuples of (path, status, diagnose,
4789 variants, parameters, api_versions) as values, eg::
4791 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4792 (/srv/..., False, "invalid api")],
4793 "node2": [(/srv/..., True, "", [], [])]}
4798 # we build here the list of nodes that didn't fail the RPC (at RPC
4799 # level), so that nodes with a non-responding node daemon don't
4800 # make all OSes invalid
4801 good_nodes = [node_name for node_name in rlist
4802 if not rlist[node_name].fail_msg]
4803 for node_name, nr in rlist.items():
4804 if nr.fail_msg or not nr.payload:
4806 for (name, path, status, diagnose, variants,
4807 params, api_versions) in nr.payload:
4808 if name not in all_os:
4809 # build a list of nodes for this os containing empty lists
4810 # for each node in node_list
4812 for nname in good_nodes:
4813 all_os[name][nname] = []
4814 # convert params from [name, help] to (name, help)
4815 params = [tuple(v) for v in params]
4816 all_os[name][node_name].append((path, status, diagnose,
4817 variants, params, api_versions))
4820 def _GetQueryData(self, lu):
4821 """Computes the list of nodes and their attributes.
4824 # Locking is not used
4825 assert not (compat.any(lu.glm.is_owned(level)
4826 for level in locking.LEVELS
4827 if level != locking.LEVEL_CLUSTER) or
4828 self.do_locking or self.use_locking)
4830 valid_nodes = [node.name
4831 for node in lu.cfg.GetAllNodesInfo().values()
4832 if not node.offline and node.vm_capable]
4833 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4834 cluster = lu.cfg.GetClusterInfo()
4838 for (os_name, os_data) in pol.items():
4839 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4840 hidden=(os_name in cluster.hidden_os),
4841 blacklisted=(os_name in cluster.blacklisted_os))
4845 api_versions = set()
4847 for idx, osl in enumerate(os_data.values()):
4848 info.valid = bool(info.valid and osl and osl[0][1])
4852 (node_variants, node_params, node_api) = osl[0][3:6]
4855 variants.update(node_variants)
4856 parameters.update(node_params)
4857 api_versions.update(node_api)
4859 # Filter out inconsistent values
4860 variants.intersection_update(node_variants)
4861 parameters.intersection_update(node_params)
4862 api_versions.intersection_update(node_api)
4864 info.variants = list(variants)
4865 info.parameters = list(parameters)
4866 info.api_versions = list(api_versions)
4868 data[os_name] = info
4870 # Prepare data in requested order
4871 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4875 class LUOsDiagnose(NoHooksLU):
4876 """Logical unit for OS diagnose/query.
4882 def _BuildFilter(fields, names):
4883 """Builds a filter for querying OSes.
4886 name_filter = qlang.MakeSimpleFilter("name", names)
4888 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4889 # respective field is not requested
4890 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4891 for fname in ["hidden", "blacklisted"]
4892 if fname not in fields]
4893 if "valid" not in fields:
4894 status_filter.append([qlang.OP_TRUE, "valid"])
4897 status_filter.insert(0, qlang.OP_AND)
4899 status_filter = None
4901 if name_filter and status_filter:
4902 return [qlang.OP_AND, name_filter, status_filter]
4906 return status_filter
4908 def CheckArguments(self):
4909 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4910 self.op.output_fields, False)
4912 def ExpandNames(self):
4913 self.oq.ExpandNames(self)
4915 def Exec(self, feedback_fn):
4916 return self.oq.OldStyleQuery(self)
4919 class LUNodeRemove(LogicalUnit):
4920 """Logical unit for removing a node.
4923 HPATH = "node-remove"
4924 HTYPE = constants.HTYPE_NODE
4926 def BuildHooksEnv(self):
4931 "OP_TARGET": self.op.node_name,
4932 "NODE_NAME": self.op.node_name,
4935 def BuildHooksNodes(self):
4936 """Build hooks nodes.
4938 This doesn't run on the target node in the pre phase as a failed
4939 node would then be impossible to remove.
4942 all_nodes = self.cfg.GetNodeList()
4944 all_nodes.remove(self.op.node_name)
4947 return (all_nodes, all_nodes)
4949 def CheckPrereq(self):
4950 """Check prerequisites.
4953 - the node exists in the configuration
4954 - it does not have primary or secondary instances
4955 - it's not the master
4957 Any errors are signaled by raising errors.OpPrereqError.
4960 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4961 node = self.cfg.GetNodeInfo(self.op.node_name)
4962 assert node is not None
4964 masternode = self.cfg.GetMasterNode()
4965 if node.name == masternode:
4966 raise errors.OpPrereqError("Node is the master node, failover to another"
4967 " node is required", errors.ECODE_INVAL)
4969 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4970 if node.name in instance.all_nodes:
4971 raise errors.OpPrereqError("Instance %s is still running on the node,"
4972 " please remove first" % instance_name,
4974 self.op.node_name = node.name
4977 def Exec(self, feedback_fn):
4978 """Removes the node from the cluster.
4982 logging.info("Stopping the node daemon and removing configs from node %s",
4985 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4987 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4990 # Promote nodes to master candidate as needed
4991 _AdjustCandidatePool(self, exceptions=[node.name])
4992 self.context.RemoveNode(node.name)
4994 # Run post hooks on the node before it's removed
4995 _RunPostHook(self, node.name)
4997 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4998 msg = result.fail_msg
5000 self.LogWarning("Errors encountered on the remote node while leaving"
5001 " the cluster: %s", msg)
5003 # Remove node from our /etc/hosts
5004 if self.cfg.GetClusterInfo().modify_etc_hosts:
5005 master_node = self.cfg.GetMasterNode()
5006 result = self.rpc.call_etc_hosts_modify(master_node,
5007 constants.ETC_HOSTS_REMOVE,
5009 result.Raise("Can't update hosts file with new host data")
5010 _RedistributeAncillaryFiles(self)
5013 class _NodeQuery(_QueryBase):
5014 FIELDS = query.NODE_FIELDS
5016 def ExpandNames(self, lu):
5017 lu.needed_locks = {}
5018 lu.share_locks = _ShareAll()
5021 self.wanted = _GetWantedNodes(lu, self.names)
5023 self.wanted = locking.ALL_SET
5025 self.do_locking = (self.use_locking and
5026 query.NQ_LIVE in self.requested_data)
5029 # If any non-static field is requested we need to lock the nodes
5030 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5032 def DeclareLocks(self, lu, level):
5035 def _GetQueryData(self, lu):
5036 """Computes the list of nodes and their attributes.
5039 all_info = lu.cfg.GetAllNodesInfo()
5041 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5043 # Gather data as requested
5044 if query.NQ_LIVE in self.requested_data:
5045 # filter out non-vm_capable nodes
5046 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5048 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5049 [lu.cfg.GetHypervisorType()])
5050 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5051 for (name, nresult) in node_data.items()
5052 if not nresult.fail_msg and nresult.payload)
5056 if query.NQ_INST in self.requested_data:
5057 node_to_primary = dict([(name, set()) for name in nodenames])
5058 node_to_secondary = dict([(name, set()) for name in nodenames])
5060 inst_data = lu.cfg.GetAllInstancesInfo()
5062 for inst in inst_data.values():
5063 if inst.primary_node in node_to_primary:
5064 node_to_primary[inst.primary_node].add(inst.name)
5065 for secnode in inst.secondary_nodes:
5066 if secnode in node_to_secondary:
5067 node_to_secondary[secnode].add(inst.name)
5069 node_to_primary = None
5070 node_to_secondary = None
5072 if query.NQ_OOB in self.requested_data:
5073 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5074 for name, node in all_info.iteritems())
5078 if query.NQ_GROUP in self.requested_data:
5079 groups = lu.cfg.GetAllNodeGroupsInfo()
5083 return query.NodeQueryData([all_info[name] for name in nodenames],
5084 live_data, lu.cfg.GetMasterNode(),
5085 node_to_primary, node_to_secondary, groups,
5086 oob_support, lu.cfg.GetClusterInfo())
5089 class LUNodeQuery(NoHooksLU):
5090 """Logical unit for querying nodes.
5093 # pylint: disable=W0142
5096 def CheckArguments(self):
5097 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5098 self.op.output_fields, self.op.use_locking)
5100 def ExpandNames(self):
5101 self.nq.ExpandNames(self)
5103 def DeclareLocks(self, level):
5104 self.nq.DeclareLocks(self, level)
5106 def Exec(self, feedback_fn):
5107 return self.nq.OldStyleQuery(self)
5110 class LUNodeQueryvols(NoHooksLU):
5111 """Logical unit for getting volumes on node(s).
5115 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5116 _FIELDS_STATIC = utils.FieldSet("node")
5118 def CheckArguments(self):
5119 _CheckOutputFields(static=self._FIELDS_STATIC,
5120 dynamic=self._FIELDS_DYNAMIC,
5121 selected=self.op.output_fields)
5123 def ExpandNames(self):
5124 self.share_locks = _ShareAll()
5125 self.needed_locks = {}
5127 if not self.op.nodes:
5128 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5130 self.needed_locks[locking.LEVEL_NODE] = \
5131 _GetWantedNodes(self, self.op.nodes)
5133 def Exec(self, feedback_fn):
5134 """Computes the list of nodes and their attributes.
5137 nodenames = self.owned_locks(locking.LEVEL_NODE)
5138 volumes = self.rpc.call_node_volumes(nodenames)
5140 ilist = self.cfg.GetAllInstancesInfo()
5141 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5144 for node in nodenames:
5145 nresult = volumes[node]
5148 msg = nresult.fail_msg
5150 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5153 node_vols = sorted(nresult.payload,
5154 key=operator.itemgetter("dev"))
5156 for vol in node_vols:
5158 for field in self.op.output_fields:
5161 elif field == "phys":
5165 elif field == "name":
5167 elif field == "size":
5168 val = int(float(vol["size"]))
5169 elif field == "instance":
5170 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5172 raise errors.ParameterError(field)
5173 node_output.append(str(val))
5175 output.append(node_output)
5180 class LUNodeQueryStorage(NoHooksLU):
5181 """Logical unit for getting information on storage units on node(s).
5184 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5187 def CheckArguments(self):
5188 _CheckOutputFields(static=self._FIELDS_STATIC,
5189 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5190 selected=self.op.output_fields)
5192 def ExpandNames(self):
5193 self.share_locks = _ShareAll()
5194 self.needed_locks = {}
5197 self.needed_locks[locking.LEVEL_NODE] = \
5198 _GetWantedNodes(self, self.op.nodes)
5200 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5202 def Exec(self, feedback_fn):
5203 """Computes the list of nodes and their attributes.
5206 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5208 # Always get name to sort by
5209 if constants.SF_NAME in self.op.output_fields:
5210 fields = self.op.output_fields[:]
5212 fields = [constants.SF_NAME] + self.op.output_fields
5214 # Never ask for node or type as it's only known to the LU
5215 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5216 while extra in fields:
5217 fields.remove(extra)
5219 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5220 name_idx = field_idx[constants.SF_NAME]
5222 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5223 data = self.rpc.call_storage_list(self.nodes,
5224 self.op.storage_type, st_args,
5225 self.op.name, fields)
5229 for node in utils.NiceSort(self.nodes):
5230 nresult = data[node]
5234 msg = nresult.fail_msg
5236 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5239 rows = dict([(row[name_idx], row) for row in nresult.payload])
5241 for name in utils.NiceSort(rows.keys()):
5246 for field in self.op.output_fields:
5247 if field == constants.SF_NODE:
5249 elif field == constants.SF_TYPE:
5250 val = self.op.storage_type
5251 elif field in field_idx:
5252 val = row[field_idx[field]]
5254 raise errors.ParameterError(field)
5263 class _InstanceQuery(_QueryBase):
5264 FIELDS = query.INSTANCE_FIELDS
5266 def ExpandNames(self, lu):
5267 lu.needed_locks = {}
5268 lu.share_locks = _ShareAll()
5271 self.wanted = _GetWantedInstances(lu, self.names)
5273 self.wanted = locking.ALL_SET
5275 self.do_locking = (self.use_locking and
5276 query.IQ_LIVE in self.requested_data)
5278 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5279 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5280 lu.needed_locks[locking.LEVEL_NODE] = []
5281 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5283 self.do_grouplocks = (self.do_locking and
5284 query.IQ_NODES in self.requested_data)
5286 def DeclareLocks(self, lu, level):
5288 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5289 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5291 # Lock all groups used by instances optimistically; this requires going
5292 # via the node before it's locked, requiring verification later on
5293 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5295 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5296 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5297 elif level == locking.LEVEL_NODE:
5298 lu._LockInstancesNodes() # pylint: disable=W0212
5301 def _CheckGroupLocks(lu):
5302 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5303 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5305 # Check if node groups for locked instances are still correct
5306 for instance_name in owned_instances:
5307 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5309 def _GetQueryData(self, lu):
5310 """Computes the list of instances and their attributes.
5313 if self.do_grouplocks:
5314 self._CheckGroupLocks(lu)
5316 cluster = lu.cfg.GetClusterInfo()
5317 all_info = lu.cfg.GetAllInstancesInfo()
5319 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5321 instance_list = [all_info[name] for name in instance_names]
5322 nodes = frozenset(itertools.chain(*(inst.all_nodes
5323 for inst in instance_list)))
5324 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5327 wrongnode_inst = set()
5329 # Gather data as requested
5330 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5332 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5334 result = node_data[name]
5336 # offline nodes will be in both lists
5337 assert result.fail_msg
5338 offline_nodes.append(name)
5340 bad_nodes.append(name)
5341 elif result.payload:
5342 for inst in result.payload:
5343 if inst in all_info:
5344 if all_info[inst].primary_node == name:
5345 live_data.update(result.payload)
5347 wrongnode_inst.add(inst)
5349 # orphan instance; we don't list it here as we don't
5350 # handle this case yet in the output of instance listing
5351 logging.warning("Orphan instance '%s' found on node %s",
5353 # else no instance is alive
5357 if query.IQ_DISKUSAGE in self.requested_data:
5358 gmi = ganeti.masterd.instance
5359 disk_usage = dict((inst.name,
5360 gmi.ComputeDiskSize(inst.disk_template,
5361 [{constants.IDISK_SIZE: disk.size}
5362 for disk in inst.disks]))
5363 for inst in instance_list)
5367 if query.IQ_CONSOLE in self.requested_data:
5369 for inst in instance_list:
5370 if inst.name in live_data:
5371 # Instance is running
5372 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5374 consinfo[inst.name] = None
5375 assert set(consinfo.keys()) == set(instance_names)
5379 if query.IQ_NODES in self.requested_data:
5380 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5382 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5383 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5384 for uuid in set(map(operator.attrgetter("group"),
5390 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5391 disk_usage, offline_nodes, bad_nodes,
5392 live_data, wrongnode_inst, consinfo,
5396 class LUQuery(NoHooksLU):
5397 """Query for resources/items of a certain kind.
5400 # pylint: disable=W0142
5403 def CheckArguments(self):
5404 qcls = _GetQueryImplementation(self.op.what)
5406 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5408 def ExpandNames(self):
5409 self.impl.ExpandNames(self)
5411 def DeclareLocks(self, level):
5412 self.impl.DeclareLocks(self, level)
5414 def Exec(self, feedback_fn):
5415 return self.impl.NewStyleQuery(self)
5418 class LUQueryFields(NoHooksLU):
5419 """Query for resources/items of a certain kind.
5422 # pylint: disable=W0142
5425 def CheckArguments(self):
5426 self.qcls = _GetQueryImplementation(self.op.what)
5428 def ExpandNames(self):
5429 self.needed_locks = {}
5431 def Exec(self, feedback_fn):
5432 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5435 class LUNodeModifyStorage(NoHooksLU):
5436 """Logical unit for modifying a storage volume on a node.
5441 def CheckArguments(self):
5442 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5444 storage_type = self.op.storage_type
5447 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5449 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5450 " modified" % storage_type,
5453 diff = set(self.op.changes.keys()) - modifiable
5455 raise errors.OpPrereqError("The following fields can not be modified for"
5456 " storage units of type '%s': %r" %
5457 (storage_type, list(diff)),
5460 def ExpandNames(self):
5461 self.needed_locks = {
5462 locking.LEVEL_NODE: self.op.node_name,
5465 def Exec(self, feedback_fn):
5466 """Computes the list of nodes and their attributes.
5469 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5470 result = self.rpc.call_storage_modify(self.op.node_name,
5471 self.op.storage_type, st_args,
5472 self.op.name, self.op.changes)
5473 result.Raise("Failed to modify storage unit '%s' on %s" %
5474 (self.op.name, self.op.node_name))
5477 class LUNodeAdd(LogicalUnit):
5478 """Logical unit for adding node to the cluster.
5482 HTYPE = constants.HTYPE_NODE
5483 _NFLAGS = ["master_capable", "vm_capable"]
5485 def CheckArguments(self):
5486 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5487 # validate/normalize the node name
5488 self.hostname = netutils.GetHostname(name=self.op.node_name,
5489 family=self.primary_ip_family)
5490 self.op.node_name = self.hostname.name
5492 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5493 raise errors.OpPrereqError("Cannot readd the master node",
5496 if self.op.readd and self.op.group:
5497 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5498 " being readded", errors.ECODE_INVAL)
5500 def BuildHooksEnv(self):
5503 This will run on all nodes before, and on all nodes + the new node after.
5507 "OP_TARGET": self.op.node_name,
5508 "NODE_NAME": self.op.node_name,
5509 "NODE_PIP": self.op.primary_ip,
5510 "NODE_SIP": self.op.secondary_ip,
5511 "MASTER_CAPABLE": str(self.op.master_capable),
5512 "VM_CAPABLE": str(self.op.vm_capable),
5515 def BuildHooksNodes(self):
5516 """Build hooks nodes.
5519 # Exclude added node
5520 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5521 post_nodes = pre_nodes + [self.op.node_name, ]
5523 return (pre_nodes, post_nodes)
5525 def CheckPrereq(self):
5526 """Check prerequisites.
5529 - the new node is not already in the config
5531 - its parameters (single/dual homed) matches the cluster
5533 Any errors are signaled by raising errors.OpPrereqError.
5537 hostname = self.hostname
5538 node = hostname.name
5539 primary_ip = self.op.primary_ip = hostname.ip
5540 if self.op.secondary_ip is None:
5541 if self.primary_ip_family == netutils.IP6Address.family:
5542 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5543 " IPv4 address must be given as secondary",
5545 self.op.secondary_ip = primary_ip
5547 secondary_ip = self.op.secondary_ip
5548 if not netutils.IP4Address.IsValid(secondary_ip):
5549 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5550 " address" % secondary_ip, errors.ECODE_INVAL)
5552 node_list = cfg.GetNodeList()
5553 if not self.op.readd and node in node_list:
5554 raise errors.OpPrereqError("Node %s is already in the configuration" %
5555 node, errors.ECODE_EXISTS)
5556 elif self.op.readd and node not in node_list:
5557 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5560 self.changed_primary_ip = False
5562 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5563 if self.op.readd and node == existing_node_name:
5564 if existing_node.secondary_ip != secondary_ip:
5565 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5566 " address configuration as before",
5568 if existing_node.primary_ip != primary_ip:
5569 self.changed_primary_ip = True
5573 if (existing_node.primary_ip == primary_ip or
5574 existing_node.secondary_ip == primary_ip or
5575 existing_node.primary_ip == secondary_ip or
5576 existing_node.secondary_ip == secondary_ip):
5577 raise errors.OpPrereqError("New node ip address(es) conflict with"
5578 " existing node %s" % existing_node.name,
5579 errors.ECODE_NOTUNIQUE)
5581 # After this 'if' block, None is no longer a valid value for the
5582 # _capable op attributes
5584 old_node = self.cfg.GetNodeInfo(node)
5585 assert old_node is not None, "Can't retrieve locked node %s" % node
5586 for attr in self._NFLAGS:
5587 if getattr(self.op, attr) is None:
5588 setattr(self.op, attr, getattr(old_node, attr))
5590 for attr in self._NFLAGS:
5591 if getattr(self.op, attr) is None:
5592 setattr(self.op, attr, True)
5594 if self.op.readd and not self.op.vm_capable:
5595 pri, sec = cfg.GetNodeInstances(node)
5597 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5598 " flag set to false, but it already holds"
5599 " instances" % node,
5602 # check that the type of the node (single versus dual homed) is the
5603 # same as for the master
5604 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5605 master_singlehomed = myself.secondary_ip == myself.primary_ip
5606 newbie_singlehomed = secondary_ip == primary_ip
5607 if master_singlehomed != newbie_singlehomed:
5608 if master_singlehomed:
5609 raise errors.OpPrereqError("The master has no secondary ip but the"
5610 " new node has one",
5613 raise errors.OpPrereqError("The master has a secondary ip but the"
5614 " new node doesn't have one",
5617 # checks reachability
5618 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5619 raise errors.OpPrereqError("Node not reachable by ping",
5620 errors.ECODE_ENVIRON)
5622 if not newbie_singlehomed:
5623 # check reachability from my secondary ip to newbie's secondary ip
5624 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5625 source=myself.secondary_ip):
5626 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5627 " based ping to node daemon port",
5628 errors.ECODE_ENVIRON)
5635 if self.op.master_capable:
5636 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5638 self.master_candidate = False
5641 self.new_node = old_node
5643 node_group = cfg.LookupNodeGroup(self.op.group)
5644 self.new_node = objects.Node(name=node,
5645 primary_ip=primary_ip,
5646 secondary_ip=secondary_ip,
5647 master_candidate=self.master_candidate,
5648 offline=False, drained=False,
5651 if self.op.ndparams:
5652 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5654 if self.op.hv_state:
5655 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5657 if self.op.disk_state:
5658 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5660 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5661 # it a property on the base class.
5662 result = rpc.DnsOnlyRunner().call_version([node])[node]
5663 result.Raise("Can't get version information from node %s" % node)
5664 if constants.PROTOCOL_VERSION == result.payload:
5665 logging.info("Communication to node %s fine, sw version %s match",
5666 node, result.payload)
5668 raise errors.OpPrereqError("Version mismatch master version %s,"
5669 " node version %s" %
5670 (constants.PROTOCOL_VERSION, result.payload),
5671 errors.ECODE_ENVIRON)
5673 def Exec(self, feedback_fn):
5674 """Adds the new node to the cluster.
5677 new_node = self.new_node
5678 node = new_node.name
5680 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5683 # We adding a new node so we assume it's powered
5684 new_node.powered = True
5686 # for re-adds, reset the offline/drained/master-candidate flags;
5687 # we need to reset here, otherwise offline would prevent RPC calls
5688 # later in the procedure; this also means that if the re-add
5689 # fails, we are left with a non-offlined, broken node
5691 new_node.drained = new_node.offline = False # pylint: disable=W0201
5692 self.LogInfo("Readding a node, the offline/drained flags were reset")
5693 # if we demote the node, we do cleanup later in the procedure
5694 new_node.master_candidate = self.master_candidate
5695 if self.changed_primary_ip:
5696 new_node.primary_ip = self.op.primary_ip
5698 # copy the master/vm_capable flags
5699 for attr in self._NFLAGS:
5700 setattr(new_node, attr, getattr(self.op, attr))
5702 # notify the user about any possible mc promotion
5703 if new_node.master_candidate:
5704 self.LogInfo("Node will be a master candidate")
5706 if self.op.ndparams:
5707 new_node.ndparams = self.op.ndparams
5709 new_node.ndparams = {}
5711 if self.op.hv_state:
5712 new_node.hv_state_static = self.new_hv_state
5714 if self.op.disk_state:
5715 new_node.disk_state_static = self.new_disk_state
5717 # Add node to our /etc/hosts, and add key to known_hosts
5718 if self.cfg.GetClusterInfo().modify_etc_hosts:
5719 master_node = self.cfg.GetMasterNode()
5720 result = self.rpc.call_etc_hosts_modify(master_node,
5721 constants.ETC_HOSTS_ADD,
5724 result.Raise("Can't update hosts file with new host data")
5726 if new_node.secondary_ip != new_node.primary_ip:
5727 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5730 node_verify_list = [self.cfg.GetMasterNode()]
5731 node_verify_param = {
5732 constants.NV_NODELIST: ([node], {}),
5733 # TODO: do a node-net-test as well?
5736 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5737 self.cfg.GetClusterName())
5738 for verifier in node_verify_list:
5739 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5740 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5742 for failed in nl_payload:
5743 feedback_fn("ssh/hostname verification failed"
5744 " (checking from %s): %s" %
5745 (verifier, nl_payload[failed]))
5746 raise errors.OpExecError("ssh/hostname verification failed")
5749 _RedistributeAncillaryFiles(self)
5750 self.context.ReaddNode(new_node)
5751 # make sure we redistribute the config
5752 self.cfg.Update(new_node, feedback_fn)
5753 # and make sure the new node will not have old files around
5754 if not new_node.master_candidate:
5755 result = self.rpc.call_node_demote_from_mc(new_node.name)
5756 msg = result.fail_msg
5758 self.LogWarning("Node failed to demote itself from master"
5759 " candidate status: %s" % msg)
5761 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5762 additional_vm=self.op.vm_capable)
5763 self.context.AddNode(new_node, self.proc.GetECId())
5766 class LUNodeSetParams(LogicalUnit):
5767 """Modifies the parameters of a node.
5769 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5770 to the node role (as _ROLE_*)
5771 @cvar _R2F: a dictionary from node role to tuples of flags
5772 @cvar _FLAGS: a list of attribute names corresponding to the flags
5775 HPATH = "node-modify"
5776 HTYPE = constants.HTYPE_NODE
5778 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5780 (True, False, False): _ROLE_CANDIDATE,
5781 (False, True, False): _ROLE_DRAINED,
5782 (False, False, True): _ROLE_OFFLINE,
5783 (False, False, False): _ROLE_REGULAR,
5785 _R2F = dict((v, k) for k, v in _F2R.items())
5786 _FLAGS = ["master_candidate", "drained", "offline"]
5788 def CheckArguments(self):
5789 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5790 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5791 self.op.master_capable, self.op.vm_capable,
5792 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5794 if all_mods.count(None) == len(all_mods):
5795 raise errors.OpPrereqError("Please pass at least one modification",
5797 if all_mods.count(True) > 1:
5798 raise errors.OpPrereqError("Can't set the node into more than one"
5799 " state at the same time",
5802 # Boolean value that tells us whether we might be demoting from MC
5803 self.might_demote = (self.op.master_candidate is False or
5804 self.op.offline is True or
5805 self.op.drained is True or
5806 self.op.master_capable is False)
5808 if self.op.secondary_ip:
5809 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5810 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5811 " address" % self.op.secondary_ip,
5814 self.lock_all = self.op.auto_promote and self.might_demote
5815 self.lock_instances = self.op.secondary_ip is not None
5817 def _InstanceFilter(self, instance):
5818 """Filter for getting affected instances.
5821 return (instance.disk_template in constants.DTS_INT_MIRROR and
5822 self.op.node_name in instance.all_nodes)
5824 def ExpandNames(self):
5826 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5828 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5830 # Since modifying a node can have severe effects on currently running
5831 # operations the resource lock is at least acquired in shared mode
5832 self.needed_locks[locking.LEVEL_NODE_RES] = \
5833 self.needed_locks[locking.LEVEL_NODE]
5835 # Get node resource and instance locks in shared mode; they are not used
5836 # for anything but read-only access
5837 self.share_locks[locking.LEVEL_NODE_RES] = 1
5838 self.share_locks[locking.LEVEL_INSTANCE] = 1
5840 if self.lock_instances:
5841 self.needed_locks[locking.LEVEL_INSTANCE] = \
5842 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5844 def BuildHooksEnv(self):
5847 This runs on the master node.
5851 "OP_TARGET": self.op.node_name,
5852 "MASTER_CANDIDATE": str(self.op.master_candidate),
5853 "OFFLINE": str(self.op.offline),
5854 "DRAINED": str(self.op.drained),
5855 "MASTER_CAPABLE": str(self.op.master_capable),
5856 "VM_CAPABLE": str(self.op.vm_capable),
5859 def BuildHooksNodes(self):
5860 """Build hooks nodes.
5863 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5866 def CheckPrereq(self):
5867 """Check prerequisites.
5869 This only checks the instance list against the existing names.
5872 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5874 if self.lock_instances:
5875 affected_instances = \
5876 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5878 # Verify instance locks
5879 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5880 wanted_instances = frozenset(affected_instances.keys())
5881 if wanted_instances - owned_instances:
5882 raise errors.OpPrereqError("Instances affected by changing node %s's"
5883 " secondary IP address have changed since"
5884 " locks were acquired, wanted '%s', have"
5885 " '%s'; retry the operation" %
5887 utils.CommaJoin(wanted_instances),
5888 utils.CommaJoin(owned_instances)),
5891 affected_instances = None
5893 if (self.op.master_candidate is not None or
5894 self.op.drained is not None or
5895 self.op.offline is not None):
5896 # we can't change the master's node flags
5897 if self.op.node_name == self.cfg.GetMasterNode():
5898 raise errors.OpPrereqError("The master role can be changed"
5899 " only via master-failover",
5902 if self.op.master_candidate and not node.master_capable:
5903 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5904 " it a master candidate" % node.name,
5907 if self.op.vm_capable is False:
5908 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5910 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5911 " the vm_capable flag" % node.name,
5914 if node.master_candidate and self.might_demote and not self.lock_all:
5915 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5916 # check if after removing the current node, we're missing master
5918 (mc_remaining, mc_should, _) = \
5919 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5920 if mc_remaining < mc_should:
5921 raise errors.OpPrereqError("Not enough master candidates, please"
5922 " pass auto promote option to allow"
5923 " promotion (--auto-promote or RAPI"
5924 " auto_promote=True)", errors.ECODE_STATE)
5926 self.old_flags = old_flags = (node.master_candidate,
5927 node.drained, node.offline)
5928 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5929 self.old_role = old_role = self._F2R[old_flags]
5931 # Check for ineffective changes
5932 for attr in self._FLAGS:
5933 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5934 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5935 setattr(self.op, attr, None)
5937 # Past this point, any flag change to False means a transition
5938 # away from the respective state, as only real changes are kept
5940 # TODO: We might query the real power state if it supports OOB
5941 if _SupportsOob(self.cfg, node):
5942 if self.op.offline is False and not (node.powered or
5943 self.op.powered is True):
5944 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5945 " offline status can be reset") %
5946 self.op.node_name, errors.ECODE_STATE)
5947 elif self.op.powered is not None:
5948 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5949 " as it does not support out-of-band"
5950 " handling") % self.op.node_name,
5953 # If we're being deofflined/drained, we'll MC ourself if needed
5954 if (self.op.drained is False or self.op.offline is False or
5955 (self.op.master_capable and not node.master_capable)):
5956 if _DecideSelfPromotion(self):
5957 self.op.master_candidate = True
5958 self.LogInfo("Auto-promoting node to master candidate")
5960 # If we're no longer master capable, we'll demote ourselves from MC
5961 if self.op.master_capable is False and node.master_candidate:
5962 self.LogInfo("Demoting from master candidate")
5963 self.op.master_candidate = False
5966 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5967 if self.op.master_candidate:
5968 new_role = self._ROLE_CANDIDATE
5969 elif self.op.drained:
5970 new_role = self._ROLE_DRAINED
5971 elif self.op.offline:
5972 new_role = self._ROLE_OFFLINE
5973 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5974 # False is still in new flags, which means we're un-setting (the
5976 new_role = self._ROLE_REGULAR
5977 else: # no new flags, nothing, keep old role
5980 self.new_role = new_role
5982 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5983 # Trying to transition out of offline status
5984 result = self.rpc.call_version([node.name])[node.name]
5986 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5987 " to report its version: %s" %
5988 (node.name, result.fail_msg),
5991 self.LogWarning("Transitioning node from offline to online state"
5992 " without using re-add. Please make sure the node"
5995 # When changing the secondary ip, verify if this is a single-homed to
5996 # multi-homed transition or vice versa, and apply the relevant
5998 if self.op.secondary_ip:
5999 # Ok even without locking, because this can't be changed by any LU
6000 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6001 master_singlehomed = master.secondary_ip == master.primary_ip
6002 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6003 if self.op.force and node.name == master.name:
6004 self.LogWarning("Transitioning from single-homed to multi-homed"
6005 " cluster. All nodes will require a secondary ip.")
6007 raise errors.OpPrereqError("Changing the secondary ip on a"
6008 " single-homed cluster requires the"
6009 " --force option to be passed, and the"
6010 " target node to be the master",
6012 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6013 if self.op.force and node.name == master.name:
6014 self.LogWarning("Transitioning from multi-homed to single-homed"
6015 " cluster. Secondary IPs will have to be removed.")
6017 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6018 " same as the primary IP on a multi-homed"
6019 " cluster, unless the --force option is"
6020 " passed, and the target node is the"
6021 " master", errors.ECODE_INVAL)
6023 assert not (frozenset(affected_instances) -
6024 self.owned_locks(locking.LEVEL_INSTANCE))
6027 if affected_instances:
6028 msg = ("Cannot change secondary IP address: offline node has"
6029 " instances (%s) configured to use it" %
6030 utils.CommaJoin(affected_instances.keys()))
6031 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6033 # On online nodes, check that no instances are running, and that
6034 # the node has the new ip and we can reach it.
6035 for instance in affected_instances.values():
6036 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6037 msg="cannot change secondary ip")
6039 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6040 if master.name != node.name:
6041 # check reachability from master secondary ip to new secondary ip
6042 if not netutils.TcpPing(self.op.secondary_ip,
6043 constants.DEFAULT_NODED_PORT,
6044 source=master.secondary_ip):
6045 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6046 " based ping to node daemon port",
6047 errors.ECODE_ENVIRON)
6049 if self.op.ndparams:
6050 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6051 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6052 self.new_ndparams = new_ndparams
6054 if self.op.hv_state:
6055 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6056 self.node.hv_state_static)
6058 if self.op.disk_state:
6059 self.new_disk_state = \
6060 _MergeAndVerifyDiskState(self.op.disk_state,
6061 self.node.disk_state_static)
6063 def Exec(self, feedback_fn):
6068 old_role = self.old_role
6069 new_role = self.new_role
6073 if self.op.ndparams:
6074 node.ndparams = self.new_ndparams
6076 if self.op.powered is not None:
6077 node.powered = self.op.powered
6079 if self.op.hv_state:
6080 node.hv_state_static = self.new_hv_state
6082 if self.op.disk_state:
6083 node.disk_state_static = self.new_disk_state
6085 for attr in ["master_capable", "vm_capable"]:
6086 val = getattr(self.op, attr)
6088 setattr(node, attr, val)
6089 result.append((attr, str(val)))
6091 if new_role != old_role:
6092 # Tell the node to demote itself, if no longer MC and not offline
6093 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6094 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6096 self.LogWarning("Node failed to demote itself: %s", msg)
6098 new_flags = self._R2F[new_role]
6099 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6101 result.append((desc, str(nf)))
6102 (node.master_candidate, node.drained, node.offline) = new_flags
6104 # we locked all nodes, we adjust the CP before updating this node
6106 _AdjustCandidatePool(self, [node.name])
6108 if self.op.secondary_ip:
6109 node.secondary_ip = self.op.secondary_ip
6110 result.append(("secondary_ip", self.op.secondary_ip))
6112 # this will trigger configuration file update, if needed
6113 self.cfg.Update(node, feedback_fn)
6115 # this will trigger job queue propagation or cleanup if the mc
6117 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6118 self.context.ReaddNode(node)
6123 class LUNodePowercycle(NoHooksLU):
6124 """Powercycles a node.
6129 def CheckArguments(self):
6130 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6131 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6132 raise errors.OpPrereqError("The node is the master and the force"
6133 " parameter was not set",
6136 def ExpandNames(self):
6137 """Locking for PowercycleNode.
6139 This is a last-resort option and shouldn't block on other
6140 jobs. Therefore, we grab no locks.
6143 self.needed_locks = {}
6145 def Exec(self, feedback_fn):
6149 result = self.rpc.call_node_powercycle(self.op.node_name,
6150 self.cfg.GetHypervisorType())
6151 result.Raise("Failed to schedule the reboot")
6152 return result.payload
6155 class LUClusterQuery(NoHooksLU):
6156 """Query cluster configuration.
6161 def ExpandNames(self):
6162 self.needed_locks = {}
6164 def Exec(self, feedback_fn):
6165 """Return cluster config.
6168 cluster = self.cfg.GetClusterInfo()
6171 # Filter just for enabled hypervisors
6172 for os_name, hv_dict in cluster.os_hvp.items():
6173 os_hvp[os_name] = {}
6174 for hv_name, hv_params in hv_dict.items():
6175 if hv_name in cluster.enabled_hypervisors:
6176 os_hvp[os_name][hv_name] = hv_params
6178 # Convert ip_family to ip_version
6179 primary_ip_version = constants.IP4_VERSION
6180 if cluster.primary_ip_family == netutils.IP6Address.family:
6181 primary_ip_version = constants.IP6_VERSION
6184 "software_version": constants.RELEASE_VERSION,
6185 "protocol_version": constants.PROTOCOL_VERSION,
6186 "config_version": constants.CONFIG_VERSION,
6187 "os_api_version": max(constants.OS_API_VERSIONS),
6188 "export_version": constants.EXPORT_VERSION,
6189 "architecture": runtime.GetArchInfo(),
6190 "name": cluster.cluster_name,
6191 "master": cluster.master_node,
6192 "default_hypervisor": cluster.primary_hypervisor,
6193 "enabled_hypervisors": cluster.enabled_hypervisors,
6194 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6195 for hypervisor_name in cluster.enabled_hypervisors]),
6197 "beparams": cluster.beparams,
6198 "osparams": cluster.osparams,
6199 "ipolicy": cluster.ipolicy,
6200 "nicparams": cluster.nicparams,
6201 "ndparams": cluster.ndparams,
6202 "diskparams": cluster.diskparams,
6203 "candidate_pool_size": cluster.candidate_pool_size,
6204 "master_netdev": cluster.master_netdev,
6205 "master_netmask": cluster.master_netmask,
6206 "use_external_mip_script": cluster.use_external_mip_script,
6207 "volume_group_name": cluster.volume_group_name,
6208 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6209 "file_storage_dir": cluster.file_storage_dir,
6210 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6211 "maintain_node_health": cluster.maintain_node_health,
6212 "ctime": cluster.ctime,
6213 "mtime": cluster.mtime,
6214 "uuid": cluster.uuid,
6215 "tags": list(cluster.GetTags()),
6216 "uid_pool": cluster.uid_pool,
6217 "default_iallocator": cluster.default_iallocator,
6218 "reserved_lvs": cluster.reserved_lvs,
6219 "primary_ip_version": primary_ip_version,
6220 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6221 "hidden_os": cluster.hidden_os,
6222 "blacklisted_os": cluster.blacklisted_os,
6228 class LUClusterConfigQuery(NoHooksLU):
6229 """Return configuration values.
6234 def CheckArguments(self):
6235 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6237 def ExpandNames(self):
6238 self.cq.ExpandNames(self)
6240 def DeclareLocks(self, level):
6241 self.cq.DeclareLocks(self, level)
6243 def Exec(self, feedback_fn):
6244 result = self.cq.OldStyleQuery(self)
6246 assert len(result) == 1
6251 class _ClusterQuery(_QueryBase):
6252 FIELDS = query.CLUSTER_FIELDS
6254 #: Do not sort (there is only one item)
6257 def ExpandNames(self, lu):
6258 lu.needed_locks = {}
6260 # The following variables interact with _QueryBase._GetNames
6261 self.wanted = locking.ALL_SET
6262 self.do_locking = self.use_locking
6265 raise errors.OpPrereqError("Can not use locking for cluster queries",
6268 def DeclareLocks(self, lu, level):
6271 def _GetQueryData(self, lu):
6272 """Computes the list of nodes and their attributes.
6275 # Locking is not used
6276 assert not (compat.any(lu.glm.is_owned(level)
6277 for level in locking.LEVELS
6278 if level != locking.LEVEL_CLUSTER) or
6279 self.do_locking or self.use_locking)
6281 if query.CQ_CONFIG in self.requested_data:
6282 cluster = lu.cfg.GetClusterInfo()
6284 cluster = NotImplemented
6286 if query.CQ_QUEUE_DRAINED in self.requested_data:
6287 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6289 drain_flag = NotImplemented
6291 if query.CQ_WATCHER_PAUSE in self.requested_data:
6292 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6294 watcher_pause = NotImplemented
6296 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6299 class LUInstanceActivateDisks(NoHooksLU):
6300 """Bring up an instance's disks.
6305 def ExpandNames(self):
6306 self._ExpandAndLockInstance()
6307 self.needed_locks[locking.LEVEL_NODE] = []
6308 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6310 def DeclareLocks(self, level):
6311 if level == locking.LEVEL_NODE:
6312 self._LockInstancesNodes()
6314 def CheckPrereq(self):
6315 """Check prerequisites.
6317 This checks that the instance is in the cluster.
6320 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6321 assert self.instance is not None, \
6322 "Cannot retrieve locked instance %s" % self.op.instance_name
6323 _CheckNodeOnline(self, self.instance.primary_node)
6325 def Exec(self, feedback_fn):
6326 """Activate the disks.
6329 disks_ok, disks_info = \
6330 _AssembleInstanceDisks(self, self.instance,
6331 ignore_size=self.op.ignore_size)
6333 raise errors.OpExecError("Cannot activate block devices")
6335 if self.op.wait_for_sync:
6336 if not _WaitForSync(self, self.instance):
6337 raise errors.OpExecError("Some disks of the instance are degraded!")
6342 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6344 """Prepare the block devices for an instance.
6346 This sets up the block devices on all nodes.
6348 @type lu: L{LogicalUnit}
6349 @param lu: the logical unit on whose behalf we execute
6350 @type instance: L{objects.Instance}
6351 @param instance: the instance for whose disks we assemble
6352 @type disks: list of L{objects.Disk} or None
6353 @param disks: which disks to assemble (or all, if None)
6354 @type ignore_secondaries: boolean
6355 @param ignore_secondaries: if true, errors on secondary nodes
6356 won't result in an error return from the function
6357 @type ignore_size: boolean
6358 @param ignore_size: if true, the current known size of the disk
6359 will not be used during the disk activation, useful for cases
6360 when the size is wrong
6361 @return: False if the operation failed, otherwise a list of
6362 (host, instance_visible_name, node_visible_name)
6363 with the mapping from node devices to instance devices
6368 iname = instance.name
6369 disks = _ExpandCheckDisks(instance, disks)
6371 # With the two passes mechanism we try to reduce the window of
6372 # opportunity for the race condition of switching DRBD to primary
6373 # before handshaking occured, but we do not eliminate it
6375 # The proper fix would be to wait (with some limits) until the
6376 # connection has been made and drbd transitions from WFConnection
6377 # into any other network-connected state (Connected, SyncTarget,
6380 # 1st pass, assemble on all nodes in secondary mode
6381 for idx, inst_disk in enumerate(disks):
6382 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6384 node_disk = node_disk.Copy()
6385 node_disk.UnsetSize()
6386 lu.cfg.SetDiskID(node_disk, node)
6387 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6389 msg = result.fail_msg
6391 is_offline_secondary = (node in instance.secondary_nodes and
6393 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6394 " (is_primary=False, pass=1): %s",
6395 inst_disk.iv_name, node, msg)
6396 if not (ignore_secondaries or is_offline_secondary):
6399 # FIXME: race condition on drbd migration to primary
6401 # 2nd pass, do only the primary node
6402 for idx, inst_disk in enumerate(disks):
6405 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6406 if node != instance.primary_node:
6409 node_disk = node_disk.Copy()
6410 node_disk.UnsetSize()
6411 lu.cfg.SetDiskID(node_disk, node)
6412 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6414 msg = result.fail_msg
6416 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6417 " (is_primary=True, pass=2): %s",
6418 inst_disk.iv_name, node, msg)
6421 dev_path = result.payload
6423 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6425 # leave the disks configured for the primary node
6426 # this is a workaround that would be fixed better by
6427 # improving the logical/physical id handling
6429 lu.cfg.SetDiskID(disk, instance.primary_node)
6431 return disks_ok, device_info
6434 def _StartInstanceDisks(lu, instance, force):
6435 """Start the disks of an instance.
6438 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6439 ignore_secondaries=force)
6441 _ShutdownInstanceDisks(lu, instance)
6442 if force is not None and not force:
6443 lu.proc.LogWarning("", hint="If the message above refers to a"
6445 " you can retry the operation using '--force'.")
6446 raise errors.OpExecError("Disk consistency error")
6449 class LUInstanceDeactivateDisks(NoHooksLU):
6450 """Shutdown an instance's disks.
6455 def ExpandNames(self):
6456 self._ExpandAndLockInstance()
6457 self.needed_locks[locking.LEVEL_NODE] = []
6458 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6460 def DeclareLocks(self, level):
6461 if level == locking.LEVEL_NODE:
6462 self._LockInstancesNodes()
6464 def CheckPrereq(self):
6465 """Check prerequisites.
6467 This checks that the instance is in the cluster.
6470 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471 assert self.instance is not None, \
6472 "Cannot retrieve locked instance %s" % self.op.instance_name
6474 def Exec(self, feedback_fn):
6475 """Deactivate the disks
6478 instance = self.instance
6480 _ShutdownInstanceDisks(self, instance)
6482 _SafeShutdownInstanceDisks(self, instance)
6485 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6486 """Shutdown block devices of an instance.
6488 This function checks if an instance is running, before calling
6489 _ShutdownInstanceDisks.
6492 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6493 _ShutdownInstanceDisks(lu, instance, disks=disks)
6496 def _ExpandCheckDisks(instance, disks):
6497 """Return the instance disks selected by the disks list
6499 @type disks: list of L{objects.Disk} or None
6500 @param disks: selected disks
6501 @rtype: list of L{objects.Disk}
6502 @return: selected instance disks to act on
6506 return instance.disks
6508 if not set(disks).issubset(instance.disks):
6509 raise errors.ProgrammerError("Can only act on disks belonging to the"
6514 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6515 """Shutdown block devices of an instance.
6517 This does the shutdown on all nodes of the instance.
6519 If the ignore_primary is false, errors on the primary node are
6524 disks = _ExpandCheckDisks(instance, disks)
6527 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6528 lu.cfg.SetDiskID(top_disk, node)
6529 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6530 msg = result.fail_msg
6532 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6533 disk.iv_name, node, msg)
6534 if ((node == instance.primary_node and not ignore_primary) or
6535 (node != instance.primary_node and not result.offline)):
6540 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6541 """Checks if a node has enough free memory.
6543 This function check if a given node has the needed amount of free
6544 memory. In case the node has less memory or we cannot get the
6545 information from the node, this function raise an OpPrereqError
6548 @type lu: C{LogicalUnit}
6549 @param lu: a logical unit from which we get configuration data
6551 @param node: the node to check
6552 @type reason: C{str}
6553 @param reason: string to use in the error message
6554 @type requested: C{int}
6555 @param requested: the amount of memory in MiB to check for
6556 @type hypervisor_name: C{str}
6557 @param hypervisor_name: the hypervisor to ask for memory stats
6559 @return: node current free memory
6560 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6561 we cannot check the node
6564 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6565 nodeinfo[node].Raise("Can't get data from node %s" % node,
6566 prereq=True, ecode=errors.ECODE_ENVIRON)
6567 (_, _, (hv_info, )) = nodeinfo[node].payload
6569 free_mem = hv_info.get("memory_free", None)
6570 if not isinstance(free_mem, int):
6571 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6572 " was '%s'" % (node, free_mem),
6573 errors.ECODE_ENVIRON)
6574 if requested > free_mem:
6575 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6576 " needed %s MiB, available %s MiB" %
6577 (node, reason, requested, free_mem),
6582 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6583 """Checks if nodes have enough free disk space in the all VGs.
6585 This function check if all given nodes have the needed amount of
6586 free disk. In case any node has less disk or we cannot get the
6587 information from the node, this function raise an OpPrereqError
6590 @type lu: C{LogicalUnit}
6591 @param lu: a logical unit from which we get configuration data
6592 @type nodenames: C{list}
6593 @param nodenames: the list of node names to check
6594 @type req_sizes: C{dict}
6595 @param req_sizes: the hash of vg and corresponding amount of disk in
6597 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6598 or we cannot check the node
6601 for vg, req_size in req_sizes.items():
6602 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6605 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6606 """Checks if nodes have enough free disk space in the specified VG.
6608 This function check if all given nodes have the needed amount of
6609 free disk. In case any node has less disk or we cannot get the
6610 information from the node, this function raise an OpPrereqError
6613 @type lu: C{LogicalUnit}
6614 @param lu: a logical unit from which we get configuration data
6615 @type nodenames: C{list}
6616 @param nodenames: the list of node names to check
6618 @param vg: the volume group to check
6619 @type requested: C{int}
6620 @param requested: the amount of disk in MiB to check for
6621 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6622 or we cannot check the node
6625 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6626 for node in nodenames:
6627 info = nodeinfo[node]
6628 info.Raise("Cannot get current information from node %s" % node,
6629 prereq=True, ecode=errors.ECODE_ENVIRON)
6630 (_, (vg_info, ), _) = info.payload
6631 vg_free = vg_info.get("vg_free", None)
6632 if not isinstance(vg_free, int):
6633 raise errors.OpPrereqError("Can't compute free disk space on node"
6634 " %s for vg %s, result was '%s'" %
6635 (node, vg, vg_free), errors.ECODE_ENVIRON)
6636 if requested > vg_free:
6637 raise errors.OpPrereqError("Not enough disk space on target node %s"
6638 " vg %s: required %d MiB, available %d MiB" %
6639 (node, vg, requested, vg_free),
6643 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6644 """Checks if nodes have enough physical CPUs
6646 This function checks if all given nodes have the needed number of
6647 physical CPUs. In case any node has less CPUs or we cannot get the
6648 information from the node, this function raises an OpPrereqError
6651 @type lu: C{LogicalUnit}
6652 @param lu: a logical unit from which we get configuration data
6653 @type nodenames: C{list}
6654 @param nodenames: the list of node names to check
6655 @type requested: C{int}
6656 @param requested: the minimum acceptable number of physical CPUs
6657 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6658 or we cannot check the node
6661 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6662 for node in nodenames:
6663 info = nodeinfo[node]
6664 info.Raise("Cannot get current information from node %s" % node,
6665 prereq=True, ecode=errors.ECODE_ENVIRON)
6666 (_, _, (hv_info, )) = info.payload
6667 num_cpus = hv_info.get("cpu_total", None)
6668 if not isinstance(num_cpus, int):
6669 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6670 " on node %s, result was '%s'" %
6671 (node, num_cpus), errors.ECODE_ENVIRON)
6672 if requested > num_cpus:
6673 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6674 "required" % (node, num_cpus, requested),
6678 class LUInstanceStartup(LogicalUnit):
6679 """Starts an instance.
6682 HPATH = "instance-start"
6683 HTYPE = constants.HTYPE_INSTANCE
6686 def CheckArguments(self):
6688 if self.op.beparams:
6689 # fill the beparams dict
6690 objects.UpgradeBeParams(self.op.beparams)
6691 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6693 def ExpandNames(self):
6694 self._ExpandAndLockInstance()
6695 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6697 def DeclareLocks(self, level):
6698 if level == locking.LEVEL_NODE_RES:
6699 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6701 def BuildHooksEnv(self):
6704 This runs on master, primary and secondary nodes of the instance.
6708 "FORCE": self.op.force,
6711 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6715 def BuildHooksNodes(self):
6716 """Build hooks nodes.
6719 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6722 def CheckPrereq(self):
6723 """Check prerequisites.
6725 This checks that the instance is in the cluster.
6728 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6729 assert self.instance is not None, \
6730 "Cannot retrieve locked instance %s" % self.op.instance_name
6733 if self.op.hvparams:
6734 # check hypervisor parameter syntax (locally)
6735 cluster = self.cfg.GetClusterInfo()
6736 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6737 filled_hvp = cluster.FillHV(instance)
6738 filled_hvp.update(self.op.hvparams)
6739 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6740 hv_type.CheckParameterSyntax(filled_hvp)
6741 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6743 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6745 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6747 if self.primary_offline and self.op.ignore_offline_nodes:
6748 self.proc.LogWarning("Ignoring offline primary node")
6750 if self.op.hvparams or self.op.beparams:
6751 self.proc.LogWarning("Overridden parameters are ignored")
6753 _CheckNodeOnline(self, instance.primary_node)
6755 bep = self.cfg.GetClusterInfo().FillBE(instance)
6756 bep.update(self.op.beparams)
6758 # check bridges existence
6759 _CheckInstanceBridgesExist(self, instance)
6761 remote_info = self.rpc.call_instance_info(instance.primary_node,
6763 instance.hypervisor)
6764 remote_info.Raise("Error checking node %s" % instance.primary_node,
6765 prereq=True, ecode=errors.ECODE_ENVIRON)
6766 if not remote_info.payload: # not running already
6767 _CheckNodeFreeMemory(self, instance.primary_node,
6768 "starting instance %s" % instance.name,
6769 bep[constants.BE_MINMEM], instance.hypervisor)
6771 def Exec(self, feedback_fn):
6772 """Start the instance.
6775 instance = self.instance
6776 force = self.op.force
6778 if not self.op.no_remember:
6779 self.cfg.MarkInstanceUp(instance.name)
6781 if self.primary_offline:
6782 assert self.op.ignore_offline_nodes
6783 self.proc.LogInfo("Primary node offline, marked instance as started")
6785 node_current = instance.primary_node
6787 _StartInstanceDisks(self, instance, force)
6790 self.rpc.call_instance_start(node_current,
6791 (instance, self.op.hvparams,
6793 self.op.startup_paused)
6794 msg = result.fail_msg
6796 _ShutdownInstanceDisks(self, instance)
6797 raise errors.OpExecError("Could not start instance: %s" % msg)
6800 class LUInstanceReboot(LogicalUnit):
6801 """Reboot an instance.
6804 HPATH = "instance-reboot"
6805 HTYPE = constants.HTYPE_INSTANCE
6808 def ExpandNames(self):
6809 self._ExpandAndLockInstance()
6811 def BuildHooksEnv(self):
6814 This runs on master, primary and secondary nodes of the instance.
6818 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6819 "REBOOT_TYPE": self.op.reboot_type,
6820 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6823 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6827 def BuildHooksNodes(self):
6828 """Build hooks nodes.
6831 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6834 def CheckPrereq(self):
6835 """Check prerequisites.
6837 This checks that the instance is in the cluster.
6840 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6841 assert self.instance is not None, \
6842 "Cannot retrieve locked instance %s" % self.op.instance_name
6843 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6844 _CheckNodeOnline(self, instance.primary_node)
6846 # check bridges existence
6847 _CheckInstanceBridgesExist(self, instance)
6849 def Exec(self, feedback_fn):
6850 """Reboot the instance.
6853 instance = self.instance
6854 ignore_secondaries = self.op.ignore_secondaries
6855 reboot_type = self.op.reboot_type
6857 remote_info = self.rpc.call_instance_info(instance.primary_node,
6859 instance.hypervisor)
6860 remote_info.Raise("Error checking node %s" % instance.primary_node)
6861 instance_running = bool(remote_info.payload)
6863 node_current = instance.primary_node
6865 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6866 constants.INSTANCE_REBOOT_HARD]:
6867 for disk in instance.disks:
6868 self.cfg.SetDiskID(disk, node_current)
6869 result = self.rpc.call_instance_reboot(node_current, instance,
6871 self.op.shutdown_timeout)
6872 result.Raise("Could not reboot instance")
6874 if instance_running:
6875 result = self.rpc.call_instance_shutdown(node_current, instance,
6876 self.op.shutdown_timeout)
6877 result.Raise("Could not shutdown instance for full reboot")
6878 _ShutdownInstanceDisks(self, instance)
6880 self.LogInfo("Instance %s was already stopped, starting now",
6882 _StartInstanceDisks(self, instance, ignore_secondaries)
6883 result = self.rpc.call_instance_start(node_current,
6884 (instance, None, None), False)
6885 msg = result.fail_msg
6887 _ShutdownInstanceDisks(self, instance)
6888 raise errors.OpExecError("Could not start instance for"
6889 " full reboot: %s" % msg)
6891 self.cfg.MarkInstanceUp(instance.name)
6894 class LUInstanceShutdown(LogicalUnit):
6895 """Shutdown an instance.
6898 HPATH = "instance-stop"
6899 HTYPE = constants.HTYPE_INSTANCE
6902 def ExpandNames(self):
6903 self._ExpandAndLockInstance()
6905 def BuildHooksEnv(self):
6908 This runs on master, primary and secondary nodes of the instance.
6911 env = _BuildInstanceHookEnvByObject(self, self.instance)
6912 env["TIMEOUT"] = self.op.timeout
6915 def BuildHooksNodes(self):
6916 """Build hooks nodes.
6919 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6922 def CheckPrereq(self):
6923 """Check prerequisites.
6925 This checks that the instance is in the cluster.
6928 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6929 assert self.instance is not None, \
6930 "Cannot retrieve locked instance %s" % self.op.instance_name
6932 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6934 self.primary_offline = \
6935 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6937 if self.primary_offline and self.op.ignore_offline_nodes:
6938 self.proc.LogWarning("Ignoring offline primary node")
6940 _CheckNodeOnline(self, self.instance.primary_node)
6942 def Exec(self, feedback_fn):
6943 """Shutdown the instance.
6946 instance = self.instance
6947 node_current = instance.primary_node
6948 timeout = self.op.timeout
6950 if not self.op.no_remember:
6951 self.cfg.MarkInstanceDown(instance.name)
6953 if self.primary_offline:
6954 assert self.op.ignore_offline_nodes
6955 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6957 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6958 msg = result.fail_msg
6960 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6962 _ShutdownInstanceDisks(self, instance)
6965 class LUInstanceReinstall(LogicalUnit):
6966 """Reinstall an instance.
6969 HPATH = "instance-reinstall"
6970 HTYPE = constants.HTYPE_INSTANCE
6973 def ExpandNames(self):
6974 self._ExpandAndLockInstance()
6976 def BuildHooksEnv(self):
6979 This runs on master, primary and secondary nodes of the instance.
6982 return _BuildInstanceHookEnvByObject(self, self.instance)
6984 def BuildHooksNodes(self):
6985 """Build hooks nodes.
6988 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6991 def CheckPrereq(self):
6992 """Check prerequisites.
6994 This checks that the instance is in the cluster and is not running.
6997 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6998 assert instance is not None, \
6999 "Cannot retrieve locked instance %s" % self.op.instance_name
7000 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7001 " offline, cannot reinstall")
7003 if instance.disk_template == constants.DT_DISKLESS:
7004 raise errors.OpPrereqError("Instance '%s' has no disks" %
7005 self.op.instance_name,
7007 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7009 if self.op.os_type is not None:
7011 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7012 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7013 instance_os = self.op.os_type
7015 instance_os = instance.os
7017 nodelist = list(instance.all_nodes)
7019 if self.op.osparams:
7020 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7021 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7022 self.os_inst = i_osdict # the new dict (without defaults)
7026 self.instance = instance
7028 def Exec(self, feedback_fn):
7029 """Reinstall the instance.
7032 inst = self.instance
7034 if self.op.os_type is not None:
7035 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7036 inst.os = self.op.os_type
7037 # Write to configuration
7038 self.cfg.Update(inst, feedback_fn)
7040 _StartInstanceDisks(self, inst, None)
7042 feedback_fn("Running the instance OS create scripts...")
7043 # FIXME: pass debug option from opcode to backend
7044 result = self.rpc.call_instance_os_add(inst.primary_node,
7045 (inst, self.os_inst), True,
7046 self.op.debug_level)
7047 result.Raise("Could not install OS for instance %s on node %s" %
7048 (inst.name, inst.primary_node))
7050 _ShutdownInstanceDisks(self, inst)
7053 class LUInstanceRecreateDisks(LogicalUnit):
7054 """Recreate an instance's missing disks.
7057 HPATH = "instance-recreate-disks"
7058 HTYPE = constants.HTYPE_INSTANCE
7061 _MODIFYABLE = frozenset([
7062 constants.IDISK_SIZE,
7063 constants.IDISK_MODE,
7066 # New or changed disk parameters may have different semantics
7067 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7068 constants.IDISK_ADOPT,
7070 # TODO: Implement support changing VG while recreating
7072 constants.IDISK_METAVG,
7075 def _RunAllocator(self):
7076 """Run the allocator based on input opcode.
7079 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7082 # The allocator should actually run in "relocate" mode, but current
7083 # allocators don't support relocating all the nodes of an instance at
7084 # the same time. As a workaround we use "allocate" mode, but this is
7085 # suboptimal for two reasons:
7086 # - The instance name passed to the allocator is present in the list of
7087 # existing instances, so there could be a conflict within the
7088 # internal structures of the allocator. This doesn't happen with the
7089 # current allocators, but it's a liability.
7090 # - The allocator counts the resources used by the instance twice: once
7091 # because the instance exists already, and once because it tries to
7092 # allocate a new instance.
7093 # The allocator could choose some of the nodes on which the instance is
7094 # running, but that's not a problem. If the instance nodes are broken,
7095 # they should be already be marked as drained or offline, and hence
7096 # skipped by the allocator. If instance disks have been lost for other
7097 # reasons, then recreating the disks on the same nodes should be fine.
7098 disk_template = self.instance.disk_template
7099 spindle_use = be_full[constants.BE_SPINDLE_USE]
7100 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7101 disk_template=disk_template,
7102 tags=list(self.instance.GetTags()),
7103 os=self.instance.os,
7105 vcpus=be_full[constants.BE_VCPUS],
7106 memory=be_full[constants.BE_MAXMEM],
7107 spindle_use=spindle_use,
7108 disks=[{constants.IDISK_SIZE: d.size,
7109 constants.IDISK_MODE: d.mode}
7110 for d in self.instance.disks],
7111 hypervisor=self.instance.hypervisor)
7112 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7114 ial.Run(self.op.iallocator)
7116 assert req.RequiredNodes() == len(self.instance.all_nodes)
7119 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7120 " %s" % (self.op.iallocator, ial.info),
7123 self.op.nodes = ial.result
7124 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7125 self.op.instance_name, self.op.iallocator,
7126 utils.CommaJoin(ial.result))
7128 def CheckArguments(self):
7129 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7130 # Normalize and convert deprecated list of disk indices
7131 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7133 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7135 raise errors.OpPrereqError("Some disks have been specified more than"
7136 " once: %s" % utils.CommaJoin(duplicates),
7139 if self.op.iallocator and self.op.nodes:
7140 raise errors.OpPrereqError("Give either the iallocator or the new"
7141 " nodes, not both", errors.ECODE_INVAL)
7143 for (idx, params) in self.op.disks:
7144 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7145 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7147 raise errors.OpPrereqError("Parameters for disk %s try to change"
7148 " unmodifyable parameter(s): %s" %
7149 (idx, utils.CommaJoin(unsupported)),
7152 def ExpandNames(self):
7153 self._ExpandAndLockInstance()
7154 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7156 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7157 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7159 self.needed_locks[locking.LEVEL_NODE] = []
7160 if self.op.iallocator:
7161 # iallocator will select a new node in the same group
7162 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7163 self.needed_locks[locking.LEVEL_NODE_RES] = []
7165 def DeclareLocks(self, level):
7166 if level == locking.LEVEL_NODEGROUP:
7167 assert self.op.iallocator is not None
7168 assert not self.op.nodes
7169 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7170 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7171 # Lock the primary group used by the instance optimistically; this
7172 # requires going via the node before it's locked, requiring
7173 # verification later on
7174 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7175 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7177 elif level == locking.LEVEL_NODE:
7178 # If an allocator is used, then we lock all the nodes in the current
7179 # instance group, as we don't know yet which ones will be selected;
7180 # if we replace the nodes without using an allocator, locks are
7181 # already declared in ExpandNames; otherwise, we need to lock all the
7182 # instance nodes for disk re-creation
7183 if self.op.iallocator:
7184 assert not self.op.nodes
7185 assert not self.needed_locks[locking.LEVEL_NODE]
7186 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7188 # Lock member nodes of the group of the primary node
7189 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7190 self.needed_locks[locking.LEVEL_NODE].extend(
7191 self.cfg.GetNodeGroup(group_uuid).members)
7192 elif not self.op.nodes:
7193 self._LockInstancesNodes(primary_only=False)
7194 elif level == locking.LEVEL_NODE_RES:
7196 self.needed_locks[locking.LEVEL_NODE_RES] = \
7197 self.needed_locks[locking.LEVEL_NODE][:]
7199 def BuildHooksEnv(self):
7202 This runs on master, primary and secondary nodes of the instance.
7205 return _BuildInstanceHookEnvByObject(self, self.instance)
7207 def BuildHooksNodes(self):
7208 """Build hooks nodes.
7211 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7214 def CheckPrereq(self):
7215 """Check prerequisites.
7217 This checks that the instance is in the cluster and is not running.
7220 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7221 assert instance is not None, \
7222 "Cannot retrieve locked instance %s" % self.op.instance_name
7224 if len(self.op.nodes) != len(instance.all_nodes):
7225 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7226 " %d replacement nodes were specified" %
7227 (instance.name, len(instance.all_nodes),
7228 len(self.op.nodes)),
7230 assert instance.disk_template != constants.DT_DRBD8 or \
7231 len(self.op.nodes) == 2
7232 assert instance.disk_template != constants.DT_PLAIN or \
7233 len(self.op.nodes) == 1
7234 primary_node = self.op.nodes[0]
7236 primary_node = instance.primary_node
7237 if not self.op.iallocator:
7238 _CheckNodeOnline(self, primary_node)
7240 if instance.disk_template == constants.DT_DISKLESS:
7241 raise errors.OpPrereqError("Instance '%s' has no disks" %
7242 self.op.instance_name, errors.ECODE_INVAL)
7244 # Verify if node group locks are still correct
7245 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7247 # Node group locks are acquired only for the primary node (and only
7248 # when the allocator is used)
7249 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7252 # if we replace nodes *and* the old primary is offline, we don't
7253 # check the instance state
7254 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7255 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7256 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7257 msg="cannot recreate disks")
7260 self.disks = dict(self.op.disks)
7262 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7264 maxidx = max(self.disks.keys())
7265 if maxidx >= len(instance.disks):
7266 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7269 if ((self.op.nodes or self.op.iallocator) and
7270 sorted(self.disks.keys()) != range(len(instance.disks))):
7271 raise errors.OpPrereqError("Can't recreate disks partially and"
7272 " change the nodes at the same time",
7275 self.instance = instance
7277 if self.op.iallocator:
7278 self._RunAllocator()
7280 # Release unneeded node and node resource locks
7281 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7282 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7284 def Exec(self, feedback_fn):
7285 """Recreate the disks.
7288 instance = self.instance
7290 assert (self.owned_locks(locking.LEVEL_NODE) ==
7291 self.owned_locks(locking.LEVEL_NODE_RES))
7294 mods = [] # keeps track of needed changes
7296 for idx, disk in enumerate(instance.disks):
7298 changes = self.disks[idx]
7300 # Disk should not be recreated
7304 # update secondaries for disks, if needed
7305 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7306 # need to update the nodes and minors
7307 assert len(self.op.nodes) == 2
7308 assert len(disk.logical_id) == 6 # otherwise disk internals
7310 (_, _, old_port, _, _, old_secret) = disk.logical_id
7311 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7312 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7313 new_minors[0], new_minors[1], old_secret)
7314 assert len(disk.logical_id) == len(new_id)
7318 mods.append((idx, new_id, changes))
7320 # now that we have passed all asserts above, we can apply the mods
7321 # in a single run (to avoid partial changes)
7322 for idx, new_id, changes in mods:
7323 disk = instance.disks[idx]
7324 if new_id is not None:
7325 assert disk.dev_type == constants.LD_DRBD8
7326 disk.logical_id = new_id
7328 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7329 mode=changes.get(constants.IDISK_MODE, None))
7331 # change primary node, if needed
7333 instance.primary_node = self.op.nodes[0]
7334 self.LogWarning("Changing the instance's nodes, you will have to"
7335 " remove any disks left on the older nodes manually")
7338 self.cfg.Update(instance, feedback_fn)
7340 _CreateDisks(self, instance, to_skip=to_skip)
7343 class LUInstanceRename(LogicalUnit):
7344 """Rename an instance.
7347 HPATH = "instance-rename"
7348 HTYPE = constants.HTYPE_INSTANCE
7350 def CheckArguments(self):
7354 if self.op.ip_check and not self.op.name_check:
7355 # TODO: make the ip check more flexible and not depend on the name check
7356 raise errors.OpPrereqError("IP address check requires a name check",
7359 def BuildHooksEnv(self):
7362 This runs on master, primary and secondary nodes of the instance.
7365 env = _BuildInstanceHookEnvByObject(self, self.instance)
7366 env["INSTANCE_NEW_NAME"] = self.op.new_name
7369 def BuildHooksNodes(self):
7370 """Build hooks nodes.
7373 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7376 def CheckPrereq(self):
7377 """Check prerequisites.
7379 This checks that the instance is in the cluster and is not running.
7382 self.op.instance_name = _ExpandInstanceName(self.cfg,
7383 self.op.instance_name)
7384 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7385 assert instance is not None
7386 _CheckNodeOnline(self, instance.primary_node)
7387 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7388 msg="cannot rename")
7389 self.instance = instance
7391 new_name = self.op.new_name
7392 if self.op.name_check:
7393 hostname = netutils.GetHostname(name=new_name)
7394 if hostname.name != new_name:
7395 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7397 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7398 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7399 " same as given hostname '%s'") %
7400 (hostname.name, self.op.new_name),
7402 new_name = self.op.new_name = hostname.name
7403 if (self.op.ip_check and
7404 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7405 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7406 (hostname.ip, new_name),
7407 errors.ECODE_NOTUNIQUE)
7409 instance_list = self.cfg.GetInstanceList()
7410 if new_name in instance_list and new_name != instance.name:
7411 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7412 new_name, errors.ECODE_EXISTS)
7414 def Exec(self, feedback_fn):
7415 """Rename the instance.
7418 inst = self.instance
7419 old_name = inst.name
7421 rename_file_storage = False
7422 if (inst.disk_template in constants.DTS_FILEBASED and
7423 self.op.new_name != inst.name):
7424 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7425 rename_file_storage = True
7427 self.cfg.RenameInstance(inst.name, self.op.new_name)
7428 # Change the instance lock. This is definitely safe while we hold the BGL.
7429 # Otherwise the new lock would have to be added in acquired mode.
7431 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7432 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7434 # re-read the instance from the configuration after rename
7435 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7437 if rename_file_storage:
7438 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7439 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7440 old_file_storage_dir,
7441 new_file_storage_dir)
7442 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7443 " (but the instance has been renamed in Ganeti)" %
7444 (inst.primary_node, old_file_storage_dir,
7445 new_file_storage_dir))
7447 _StartInstanceDisks(self, inst, None)
7449 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7450 old_name, self.op.debug_level)
7451 msg = result.fail_msg
7453 msg = ("Could not run OS rename script for instance %s on node %s"
7454 " (but the instance has been renamed in Ganeti): %s" %
7455 (inst.name, inst.primary_node, msg))
7456 self.proc.LogWarning(msg)
7458 _ShutdownInstanceDisks(self, inst)
7463 class LUInstanceRemove(LogicalUnit):
7464 """Remove an instance.
7467 HPATH = "instance-remove"
7468 HTYPE = constants.HTYPE_INSTANCE
7471 def ExpandNames(self):
7472 self._ExpandAndLockInstance()
7473 self.needed_locks[locking.LEVEL_NODE] = []
7474 self.needed_locks[locking.LEVEL_NODE_RES] = []
7475 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7477 def DeclareLocks(self, level):
7478 if level == locking.LEVEL_NODE:
7479 self._LockInstancesNodes()
7480 elif level == locking.LEVEL_NODE_RES:
7482 self.needed_locks[locking.LEVEL_NODE_RES] = \
7483 self.needed_locks[locking.LEVEL_NODE][:]
7485 def BuildHooksEnv(self):
7488 This runs on master, primary and secondary nodes of the instance.
7491 env = _BuildInstanceHookEnvByObject(self, self.instance)
7492 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7495 def BuildHooksNodes(self):
7496 """Build hooks nodes.
7499 nl = [self.cfg.GetMasterNode()]
7500 nl_post = list(self.instance.all_nodes) + nl
7501 return (nl, nl_post)
7503 def CheckPrereq(self):
7504 """Check prerequisites.
7506 This checks that the instance is in the cluster.
7509 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7510 assert self.instance is not None, \
7511 "Cannot retrieve locked instance %s" % self.op.instance_name
7513 def Exec(self, feedback_fn):
7514 """Remove the instance.
7517 instance = self.instance
7518 logging.info("Shutting down instance %s on node %s",
7519 instance.name, instance.primary_node)
7521 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7522 self.op.shutdown_timeout)
7523 msg = result.fail_msg
7525 if self.op.ignore_failures:
7526 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7528 raise errors.OpExecError("Could not shutdown instance %s on"
7530 (instance.name, instance.primary_node, msg))
7532 assert (self.owned_locks(locking.LEVEL_NODE) ==
7533 self.owned_locks(locking.LEVEL_NODE_RES))
7534 assert not (set(instance.all_nodes) -
7535 self.owned_locks(locking.LEVEL_NODE)), \
7536 "Not owning correct locks"
7538 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7541 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7542 """Utility function to remove an instance.
7545 logging.info("Removing block devices for instance %s", instance.name)
7547 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7548 if not ignore_failures:
7549 raise errors.OpExecError("Can't remove instance's disks")
7550 feedback_fn("Warning: can't remove instance's disks")
7552 logging.info("Removing instance %s out of cluster config", instance.name)
7554 lu.cfg.RemoveInstance(instance.name)
7556 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7557 "Instance lock removal conflict"
7559 # Remove lock for the instance
7560 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7563 class LUInstanceQuery(NoHooksLU):
7564 """Logical unit for querying instances.
7567 # pylint: disable=W0142
7570 def CheckArguments(self):
7571 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7572 self.op.output_fields, self.op.use_locking)
7574 def ExpandNames(self):
7575 self.iq.ExpandNames(self)
7577 def DeclareLocks(self, level):
7578 self.iq.DeclareLocks(self, level)
7580 def Exec(self, feedback_fn):
7581 return self.iq.OldStyleQuery(self)
7584 class LUInstanceFailover(LogicalUnit):
7585 """Failover an instance.
7588 HPATH = "instance-failover"
7589 HTYPE = constants.HTYPE_INSTANCE
7592 def CheckArguments(self):
7593 """Check the arguments.
7596 self.iallocator = getattr(self.op, "iallocator", None)
7597 self.target_node = getattr(self.op, "target_node", None)
7599 def ExpandNames(self):
7600 self._ExpandAndLockInstance()
7602 if self.op.target_node is not None:
7603 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7605 self.needed_locks[locking.LEVEL_NODE] = []
7606 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7608 self.needed_locks[locking.LEVEL_NODE_RES] = []
7609 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7611 ignore_consistency = self.op.ignore_consistency
7612 shutdown_timeout = self.op.shutdown_timeout
7613 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7616 ignore_consistency=ignore_consistency,
7617 shutdown_timeout=shutdown_timeout,
7618 ignore_ipolicy=self.op.ignore_ipolicy)
7619 self.tasklets = [self._migrater]
7621 def DeclareLocks(self, level):
7622 if level == locking.LEVEL_NODE:
7623 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7624 if instance.disk_template in constants.DTS_EXT_MIRROR:
7625 if self.op.target_node is None:
7626 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7628 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7629 self.op.target_node]
7630 del self.recalculate_locks[locking.LEVEL_NODE]
7632 self._LockInstancesNodes()
7633 elif level == locking.LEVEL_NODE_RES:
7635 self.needed_locks[locking.LEVEL_NODE_RES] = \
7636 self.needed_locks[locking.LEVEL_NODE][:]
7638 def BuildHooksEnv(self):
7641 This runs on master, primary and secondary nodes of the instance.
7644 instance = self._migrater.instance
7645 source_node = instance.primary_node
7646 target_node = self.op.target_node
7648 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7649 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7650 "OLD_PRIMARY": source_node,
7651 "NEW_PRIMARY": target_node,
7654 if instance.disk_template in constants.DTS_INT_MIRROR:
7655 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7656 env["NEW_SECONDARY"] = source_node
7658 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7660 env.update(_BuildInstanceHookEnvByObject(self, instance))
7664 def BuildHooksNodes(self):
7665 """Build hooks nodes.
7668 instance = self._migrater.instance
7669 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7670 return (nl, nl + [instance.primary_node])
7673 class LUInstanceMigrate(LogicalUnit):
7674 """Migrate an instance.
7676 This is migration without shutting down, compared to the failover,
7677 which is done with shutdown.
7680 HPATH = "instance-migrate"
7681 HTYPE = constants.HTYPE_INSTANCE
7684 def ExpandNames(self):
7685 self._ExpandAndLockInstance()
7687 if self.op.target_node is not None:
7688 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7690 self.needed_locks[locking.LEVEL_NODE] = []
7691 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7693 self.needed_locks[locking.LEVEL_NODE] = []
7694 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7697 TLMigrateInstance(self, self.op.instance_name,
7698 cleanup=self.op.cleanup,
7700 fallback=self.op.allow_failover,
7701 allow_runtime_changes=self.op.allow_runtime_changes,
7702 ignore_ipolicy=self.op.ignore_ipolicy)
7703 self.tasklets = [self._migrater]
7705 def DeclareLocks(self, level):
7706 if level == locking.LEVEL_NODE:
7707 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7708 if instance.disk_template in constants.DTS_EXT_MIRROR:
7709 if self.op.target_node is None:
7710 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7712 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7713 self.op.target_node]
7714 del self.recalculate_locks[locking.LEVEL_NODE]
7716 self._LockInstancesNodes()
7717 elif level == locking.LEVEL_NODE_RES:
7719 self.needed_locks[locking.LEVEL_NODE_RES] = \
7720 self.needed_locks[locking.LEVEL_NODE][:]
7722 def BuildHooksEnv(self):
7725 This runs on master, primary and secondary nodes of the instance.
7728 instance = self._migrater.instance
7729 source_node = instance.primary_node
7730 target_node = self.op.target_node
7731 env = _BuildInstanceHookEnvByObject(self, instance)
7733 "MIGRATE_LIVE": self._migrater.live,
7734 "MIGRATE_CLEANUP": self.op.cleanup,
7735 "OLD_PRIMARY": source_node,
7736 "NEW_PRIMARY": target_node,
7737 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7740 if instance.disk_template in constants.DTS_INT_MIRROR:
7741 env["OLD_SECONDARY"] = target_node
7742 env["NEW_SECONDARY"] = source_node
7744 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7748 def BuildHooksNodes(self):
7749 """Build hooks nodes.
7752 instance = self._migrater.instance
7753 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7754 return (nl, nl + [instance.primary_node])
7757 class LUInstanceMove(LogicalUnit):
7758 """Move an instance by data-copying.
7761 HPATH = "instance-move"
7762 HTYPE = constants.HTYPE_INSTANCE
7765 def ExpandNames(self):
7766 self._ExpandAndLockInstance()
7767 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7768 self.op.target_node = target_node
7769 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7770 self.needed_locks[locking.LEVEL_NODE_RES] = []
7771 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7773 def DeclareLocks(self, level):
7774 if level == locking.LEVEL_NODE:
7775 self._LockInstancesNodes(primary_only=True)
7776 elif level == locking.LEVEL_NODE_RES:
7778 self.needed_locks[locking.LEVEL_NODE_RES] = \
7779 self.needed_locks[locking.LEVEL_NODE][:]
7781 def BuildHooksEnv(self):
7784 This runs on master, primary and secondary nodes of the instance.
7788 "TARGET_NODE": self.op.target_node,
7789 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7791 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7794 def BuildHooksNodes(self):
7795 """Build hooks nodes.
7799 self.cfg.GetMasterNode(),
7800 self.instance.primary_node,
7801 self.op.target_node,
7805 def CheckPrereq(self):
7806 """Check prerequisites.
7808 This checks that the instance is in the cluster.
7811 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7812 assert self.instance is not None, \
7813 "Cannot retrieve locked instance %s" % self.op.instance_name
7815 node = self.cfg.GetNodeInfo(self.op.target_node)
7816 assert node is not None, \
7817 "Cannot retrieve locked node %s" % self.op.target_node
7819 self.target_node = target_node = node.name
7821 if target_node == instance.primary_node:
7822 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7823 (instance.name, target_node),
7826 bep = self.cfg.GetClusterInfo().FillBE(instance)
7828 for idx, dsk in enumerate(instance.disks):
7829 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7830 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7831 " cannot copy" % idx, errors.ECODE_STATE)
7833 _CheckNodeOnline(self, target_node)
7834 _CheckNodeNotDrained(self, target_node)
7835 _CheckNodeVmCapable(self, target_node)
7836 cluster = self.cfg.GetClusterInfo()
7837 group_info = self.cfg.GetNodeGroup(node.group)
7838 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7839 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7840 ignore=self.op.ignore_ipolicy)
7842 if instance.admin_state == constants.ADMINST_UP:
7843 # check memory requirements on the secondary node
7844 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7845 instance.name, bep[constants.BE_MAXMEM],
7846 instance.hypervisor)
7848 self.LogInfo("Not checking memory on the secondary node as"
7849 " instance will not be started")
7851 # check bridge existance
7852 _CheckInstanceBridgesExist(self, instance, node=target_node)
7854 def Exec(self, feedback_fn):
7855 """Move an instance.
7857 The move is done by shutting it down on its present node, copying
7858 the data over (slow) and starting it on the new node.
7861 instance = self.instance
7863 source_node = instance.primary_node
7864 target_node = self.target_node
7866 self.LogInfo("Shutting down instance %s on source node %s",
7867 instance.name, source_node)
7869 assert (self.owned_locks(locking.LEVEL_NODE) ==
7870 self.owned_locks(locking.LEVEL_NODE_RES))
7872 result = self.rpc.call_instance_shutdown(source_node, instance,
7873 self.op.shutdown_timeout)
7874 msg = result.fail_msg
7876 if self.op.ignore_consistency:
7877 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7878 " Proceeding anyway. Please make sure node"
7879 " %s is down. Error details: %s",
7880 instance.name, source_node, source_node, msg)
7882 raise errors.OpExecError("Could not shutdown instance %s on"
7884 (instance.name, source_node, msg))
7886 # create the target disks
7888 _CreateDisks(self, instance, target_node=target_node)
7889 except errors.OpExecError:
7890 self.LogWarning("Device creation failed, reverting...")
7892 _RemoveDisks(self, instance, target_node=target_node)
7894 self.cfg.ReleaseDRBDMinors(instance.name)
7897 cluster_name = self.cfg.GetClusterInfo().cluster_name
7900 # activate, get path, copy the data over
7901 for idx, disk in enumerate(instance.disks):
7902 self.LogInfo("Copying data for disk %d", idx)
7903 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7904 instance.name, True, idx)
7906 self.LogWarning("Can't assemble newly created disk %d: %s",
7907 idx, result.fail_msg)
7908 errs.append(result.fail_msg)
7910 dev_path = result.payload
7911 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7912 target_node, dev_path,
7915 self.LogWarning("Can't copy data over for disk %d: %s",
7916 idx, result.fail_msg)
7917 errs.append(result.fail_msg)
7921 self.LogWarning("Some disks failed to copy, aborting")
7923 _RemoveDisks(self, instance, target_node=target_node)
7925 self.cfg.ReleaseDRBDMinors(instance.name)
7926 raise errors.OpExecError("Errors during disk copy: %s" %
7929 instance.primary_node = target_node
7930 self.cfg.Update(instance, feedback_fn)
7932 self.LogInfo("Removing the disks on the original node")
7933 _RemoveDisks(self, instance, target_node=source_node)
7935 # Only start the instance if it's marked as up
7936 if instance.admin_state == constants.ADMINST_UP:
7937 self.LogInfo("Starting instance %s on node %s",
7938 instance.name, target_node)
7940 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7941 ignore_secondaries=True)
7943 _ShutdownInstanceDisks(self, instance)
7944 raise errors.OpExecError("Can't activate the instance's disks")
7946 result = self.rpc.call_instance_start(target_node,
7947 (instance, None, None), False)
7948 msg = result.fail_msg
7950 _ShutdownInstanceDisks(self, instance)
7951 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7952 (instance.name, target_node, msg))
7955 class LUNodeMigrate(LogicalUnit):
7956 """Migrate all instances from a node.
7959 HPATH = "node-migrate"
7960 HTYPE = constants.HTYPE_NODE
7963 def CheckArguments(self):
7966 def ExpandNames(self):
7967 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7969 self.share_locks = _ShareAll()
7970 self.needed_locks = {
7971 locking.LEVEL_NODE: [self.op.node_name],
7974 def BuildHooksEnv(self):
7977 This runs on the master, the primary and all the secondaries.
7981 "NODE_NAME": self.op.node_name,
7982 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7985 def BuildHooksNodes(self):
7986 """Build hooks nodes.
7989 nl = [self.cfg.GetMasterNode()]
7992 def CheckPrereq(self):
7995 def Exec(self, feedback_fn):
7996 # Prepare jobs for migration instances
7997 allow_runtime_changes = self.op.allow_runtime_changes
7999 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8002 iallocator=self.op.iallocator,
8003 target_node=self.op.target_node,
8004 allow_runtime_changes=allow_runtime_changes,
8005 ignore_ipolicy=self.op.ignore_ipolicy)]
8006 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8009 # TODO: Run iallocator in this opcode and pass correct placement options to
8010 # OpInstanceMigrate. Since other jobs can modify the cluster between
8011 # running the iallocator and the actual migration, a good consistency model
8012 # will have to be found.
8014 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8015 frozenset([self.op.node_name]))
8017 return ResultWithJobs(jobs)
8020 class TLMigrateInstance(Tasklet):
8021 """Tasklet class for instance migration.
8024 @ivar live: whether the migration will be done live or non-live;
8025 this variable is initalized only after CheckPrereq has run
8026 @type cleanup: boolean
8027 @ivar cleanup: Wheater we cleanup from a failed migration
8028 @type iallocator: string
8029 @ivar iallocator: The iallocator used to determine target_node
8030 @type target_node: string
8031 @ivar target_node: If given, the target_node to reallocate the instance to
8032 @type failover: boolean
8033 @ivar failover: Whether operation results in failover or migration
8034 @type fallback: boolean
8035 @ivar fallback: Whether fallback to failover is allowed if migration not
8037 @type ignore_consistency: boolean
8038 @ivar ignore_consistency: Wheter we should ignore consistency between source
8040 @type shutdown_timeout: int
8041 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8042 @type ignore_ipolicy: bool
8043 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8048 _MIGRATION_POLL_INTERVAL = 1 # seconds
8049 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8051 def __init__(self, lu, instance_name, cleanup=False,
8052 failover=False, fallback=False,
8053 ignore_consistency=False,
8054 allow_runtime_changes=True,
8055 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8056 ignore_ipolicy=False):
8057 """Initializes this class.
8060 Tasklet.__init__(self, lu)
8063 self.instance_name = instance_name
8064 self.cleanup = cleanup
8065 self.live = False # will be overridden later
8066 self.failover = failover
8067 self.fallback = fallback
8068 self.ignore_consistency = ignore_consistency
8069 self.shutdown_timeout = shutdown_timeout
8070 self.ignore_ipolicy = ignore_ipolicy
8071 self.allow_runtime_changes = allow_runtime_changes
8073 def CheckPrereq(self):
8074 """Check prerequisites.
8076 This checks that the instance is in the cluster.
8079 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8080 instance = self.cfg.GetInstanceInfo(instance_name)
8081 assert instance is not None
8082 self.instance = instance
8083 cluster = self.cfg.GetClusterInfo()
8085 if (not self.cleanup and
8086 not instance.admin_state == constants.ADMINST_UP and
8087 not self.failover and self.fallback):
8088 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8089 " switching to failover")
8090 self.failover = True
8092 if instance.disk_template not in constants.DTS_MIRRORED:
8097 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8098 " %s" % (instance.disk_template, text),
8101 if instance.disk_template in constants.DTS_EXT_MIRROR:
8102 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8104 if self.lu.op.iallocator:
8105 self._RunAllocator()
8107 # We set set self.target_node as it is required by
8109 self.target_node = self.lu.op.target_node
8111 # Check that the target node is correct in terms of instance policy
8112 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8113 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8114 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8116 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8117 ignore=self.ignore_ipolicy)
8119 # self.target_node is already populated, either directly or by the
8121 target_node = self.target_node
8122 if self.target_node == instance.primary_node:
8123 raise errors.OpPrereqError("Cannot migrate instance %s"
8124 " to its primary (%s)" %
8125 (instance.name, instance.primary_node),
8128 if len(self.lu.tasklets) == 1:
8129 # It is safe to release locks only when we're the only tasklet
8131 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8132 keep=[instance.primary_node, self.target_node])
8135 secondary_nodes = instance.secondary_nodes
8136 if not secondary_nodes:
8137 raise errors.ConfigurationError("No secondary node but using"
8138 " %s disk template" %
8139 instance.disk_template)
8140 target_node = secondary_nodes[0]
8141 if self.lu.op.iallocator or (self.lu.op.target_node and
8142 self.lu.op.target_node != target_node):
8144 text = "failed over"
8147 raise errors.OpPrereqError("Instances with disk template %s cannot"
8148 " be %s to arbitrary nodes"
8149 " (neither an iallocator nor a target"
8150 " node can be passed)" %
8151 (instance.disk_template, text),
8153 nodeinfo = self.cfg.GetNodeInfo(target_node)
8154 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8155 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8157 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8158 ignore=self.ignore_ipolicy)
8160 i_be = cluster.FillBE(instance)
8162 # check memory requirements on the secondary node
8163 if (not self.cleanup and
8164 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8165 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8166 "migrating instance %s" %
8168 i_be[constants.BE_MINMEM],
8169 instance.hypervisor)
8171 self.lu.LogInfo("Not checking memory on the secondary node as"
8172 " instance will not be started")
8174 # check if failover must be forced instead of migration
8175 if (not self.cleanup and not self.failover and
8176 i_be[constants.BE_ALWAYS_FAILOVER]):
8178 self.lu.LogInfo("Instance configured to always failover; fallback"
8180 self.failover = True
8182 raise errors.OpPrereqError("This instance has been configured to"
8183 " always failover, please allow failover",
8186 # check bridge existance
8187 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8189 if not self.cleanup:
8190 _CheckNodeNotDrained(self.lu, target_node)
8191 if not self.failover:
8192 result = self.rpc.call_instance_migratable(instance.primary_node,
8194 if result.fail_msg and self.fallback:
8195 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8197 self.failover = True
8199 result.Raise("Can't migrate, please use failover",
8200 prereq=True, ecode=errors.ECODE_STATE)
8202 assert not (self.failover and self.cleanup)
8204 if not self.failover:
8205 if self.lu.op.live is not None and self.lu.op.mode is not None:
8206 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8207 " parameters are accepted",
8209 if self.lu.op.live is not None:
8211 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8213 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8214 # reset the 'live' parameter to None so that repeated
8215 # invocations of CheckPrereq do not raise an exception
8216 self.lu.op.live = None
8217 elif self.lu.op.mode is None:
8218 # read the default value from the hypervisor
8219 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8220 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8222 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8224 # Failover is never live
8227 if not (self.failover or self.cleanup):
8228 remote_info = self.rpc.call_instance_info(instance.primary_node,
8230 instance.hypervisor)
8231 remote_info.Raise("Error checking instance on node %s" %
8232 instance.primary_node)
8233 instance_running = bool(remote_info.payload)
8234 if instance_running:
8235 self.current_mem = int(remote_info.payload["memory"])
8237 def _RunAllocator(self):
8238 """Run the allocator based on input opcode.
8241 # FIXME: add a self.ignore_ipolicy option
8242 req = iallocator.IAReqRelocate(name=self.instance_name,
8243 relocate_from=[self.instance.primary_node])
8244 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8246 ial.Run(self.lu.op.iallocator)
8249 raise errors.OpPrereqError("Can't compute nodes using"
8250 " iallocator '%s': %s" %
8251 (self.lu.op.iallocator, ial.info),
8253 self.target_node = ial.result[0]
8254 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8255 self.instance_name, self.lu.op.iallocator,
8256 utils.CommaJoin(ial.result))
8258 def _WaitUntilSync(self):
8259 """Poll with custom rpc for disk sync.
8261 This uses our own step-based rpc call.
8264 self.feedback_fn("* wait until resync is done")
8268 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8270 (self.instance.disks,
8273 for node, nres in result.items():
8274 nres.Raise("Cannot resync disks on node %s" % node)
8275 node_done, node_percent = nres.payload
8276 all_done = all_done and node_done
8277 if node_percent is not None:
8278 min_percent = min(min_percent, node_percent)
8280 if min_percent < 100:
8281 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8284 def _EnsureSecondary(self, node):
8285 """Demote a node to secondary.
8288 self.feedback_fn("* switching node %s to secondary mode" % node)
8290 for dev in self.instance.disks:
8291 self.cfg.SetDiskID(dev, node)
8293 result = self.rpc.call_blockdev_close(node, self.instance.name,
8294 self.instance.disks)
8295 result.Raise("Cannot change disk to secondary on node %s" % node)
8297 def _GoStandalone(self):
8298 """Disconnect from the network.
8301 self.feedback_fn("* changing into standalone mode")
8302 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8303 self.instance.disks)
8304 for node, nres in result.items():
8305 nres.Raise("Cannot disconnect disks node %s" % node)
8307 def _GoReconnect(self, multimaster):
8308 """Reconnect to the network.
8314 msg = "single-master"
8315 self.feedback_fn("* changing disks into %s mode" % msg)
8316 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8317 (self.instance.disks, self.instance),
8318 self.instance.name, multimaster)
8319 for node, nres in result.items():
8320 nres.Raise("Cannot change disks config on node %s" % node)
8322 def _ExecCleanup(self):
8323 """Try to cleanup after a failed migration.
8325 The cleanup is done by:
8326 - check that the instance is running only on one node
8327 (and update the config if needed)
8328 - change disks on its secondary node to secondary
8329 - wait until disks are fully synchronized
8330 - disconnect from the network
8331 - change disks into single-master mode
8332 - wait again until disks are fully synchronized
8335 instance = self.instance
8336 target_node = self.target_node
8337 source_node = self.source_node
8339 # check running on only one node
8340 self.feedback_fn("* checking where the instance actually runs"
8341 " (if this hangs, the hypervisor might be in"
8343 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8344 for node, result in ins_l.items():
8345 result.Raise("Can't contact node %s" % node)
8347 runningon_source = instance.name in ins_l[source_node].payload
8348 runningon_target = instance.name in ins_l[target_node].payload
8350 if runningon_source and runningon_target:
8351 raise errors.OpExecError("Instance seems to be running on two nodes,"
8352 " or the hypervisor is confused; you will have"
8353 " to ensure manually that it runs only on one"
8354 " and restart this operation")
8356 if not (runningon_source or runningon_target):
8357 raise errors.OpExecError("Instance does not seem to be running at all;"
8358 " in this case it's safer to repair by"
8359 " running 'gnt-instance stop' to ensure disk"
8360 " shutdown, and then restarting it")
8362 if runningon_target:
8363 # the migration has actually succeeded, we need to update the config
8364 self.feedback_fn("* instance running on secondary node (%s),"
8365 " updating config" % target_node)
8366 instance.primary_node = target_node
8367 self.cfg.Update(instance, self.feedback_fn)
8368 demoted_node = source_node
8370 self.feedback_fn("* instance confirmed to be running on its"
8371 " primary node (%s)" % source_node)
8372 demoted_node = target_node
8374 if instance.disk_template in constants.DTS_INT_MIRROR:
8375 self._EnsureSecondary(demoted_node)
8377 self._WaitUntilSync()
8378 except errors.OpExecError:
8379 # we ignore here errors, since if the device is standalone, it
8380 # won't be able to sync
8382 self._GoStandalone()
8383 self._GoReconnect(False)
8384 self._WaitUntilSync()
8386 self.feedback_fn("* done")
8388 def _RevertDiskStatus(self):
8389 """Try to revert the disk status after a failed migration.
8392 target_node = self.target_node
8393 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8397 self._EnsureSecondary(target_node)
8398 self._GoStandalone()
8399 self._GoReconnect(False)
8400 self._WaitUntilSync()
8401 except errors.OpExecError, err:
8402 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8403 " please try to recover the instance manually;"
8404 " error '%s'" % str(err))
8406 def _AbortMigration(self):
8407 """Call the hypervisor code to abort a started migration.
8410 instance = self.instance
8411 target_node = self.target_node
8412 source_node = self.source_node
8413 migration_info = self.migration_info
8415 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8419 abort_msg = abort_result.fail_msg
8421 logging.error("Aborting migration failed on target node %s: %s",
8422 target_node, abort_msg)
8423 # Don't raise an exception here, as we stil have to try to revert the
8424 # disk status, even if this step failed.
8426 abort_result = self.rpc.call_instance_finalize_migration_src(
8427 source_node, instance, False, self.live)
8428 abort_msg = abort_result.fail_msg
8430 logging.error("Aborting migration failed on source node %s: %s",
8431 source_node, abort_msg)
8433 def _ExecMigration(self):
8434 """Migrate an instance.
8436 The migrate is done by:
8437 - change the disks into dual-master mode
8438 - wait until disks are fully synchronized again
8439 - migrate the instance
8440 - change disks on the new secondary node (the old primary) to secondary
8441 - wait until disks are fully synchronized
8442 - change disks into single-master mode
8445 instance = self.instance
8446 target_node = self.target_node
8447 source_node = self.source_node
8449 # Check for hypervisor version mismatch and warn the user.
8450 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8451 None, [self.instance.hypervisor])
8452 for ninfo in nodeinfo.values():
8453 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8455 (_, _, (src_info, )) = nodeinfo[source_node].payload
8456 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8458 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8459 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8460 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8461 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8462 if src_version != dst_version:
8463 self.feedback_fn("* warning: hypervisor version mismatch between"
8464 " source (%s) and target (%s) node" %
8465 (src_version, dst_version))
8467 self.feedback_fn("* checking disk consistency between source and target")
8468 for (idx, dev) in enumerate(instance.disks):
8469 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8470 raise errors.OpExecError("Disk %s is degraded or not fully"
8471 " synchronized on target node,"
8472 " aborting migration" % idx)
8474 if self.current_mem > self.tgt_free_mem:
8475 if not self.allow_runtime_changes:
8476 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8477 " free memory to fit instance %s on target"
8478 " node %s (have %dMB, need %dMB)" %
8479 (instance.name, target_node,
8480 self.tgt_free_mem, self.current_mem))
8481 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8482 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8485 rpcres.Raise("Cannot modify instance runtime memory")
8487 # First get the migration information from the remote node
8488 result = self.rpc.call_migration_info(source_node, instance)
8489 msg = result.fail_msg
8491 log_err = ("Failed fetching source migration information from %s: %s" %
8493 logging.error(log_err)
8494 raise errors.OpExecError(log_err)
8496 self.migration_info = migration_info = result.payload
8498 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8499 # Then switch the disks to master/master mode
8500 self._EnsureSecondary(target_node)
8501 self._GoStandalone()
8502 self._GoReconnect(True)
8503 self._WaitUntilSync()
8505 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8506 result = self.rpc.call_accept_instance(target_node,
8509 self.nodes_ip[target_node])
8511 msg = result.fail_msg
8513 logging.error("Instance pre-migration failed, trying to revert"
8514 " disk status: %s", msg)
8515 self.feedback_fn("Pre-migration failed, aborting")
8516 self._AbortMigration()
8517 self._RevertDiskStatus()
8518 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8519 (instance.name, msg))
8521 self.feedback_fn("* migrating instance to %s" % target_node)
8522 result = self.rpc.call_instance_migrate(source_node, instance,
8523 self.nodes_ip[target_node],
8525 msg = result.fail_msg
8527 logging.error("Instance migration failed, trying to revert"
8528 " disk status: %s", msg)
8529 self.feedback_fn("Migration failed, aborting")
8530 self._AbortMigration()
8531 self._RevertDiskStatus()
8532 raise errors.OpExecError("Could not migrate instance %s: %s" %
8533 (instance.name, msg))
8535 self.feedback_fn("* starting memory transfer")
8536 last_feedback = time.time()
8538 result = self.rpc.call_instance_get_migration_status(source_node,
8540 msg = result.fail_msg
8541 ms = result.payload # MigrationStatus instance
8542 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8543 logging.error("Instance migration failed, trying to revert"
8544 " disk status: %s", msg)
8545 self.feedback_fn("Migration failed, aborting")
8546 self._AbortMigration()
8547 self._RevertDiskStatus()
8548 raise errors.OpExecError("Could not migrate instance %s: %s" %
8549 (instance.name, msg))
8551 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8552 self.feedback_fn("* memory transfer complete")
8555 if (utils.TimeoutExpired(last_feedback,
8556 self._MIGRATION_FEEDBACK_INTERVAL) and
8557 ms.transferred_ram is not None):
8558 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8559 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8560 last_feedback = time.time()
8562 time.sleep(self._MIGRATION_POLL_INTERVAL)
8564 result = self.rpc.call_instance_finalize_migration_src(source_node,
8568 msg = result.fail_msg
8570 logging.error("Instance migration succeeded, but finalization failed"
8571 " on the source node: %s", msg)
8572 raise errors.OpExecError("Could not finalize instance migration: %s" %
8575 instance.primary_node = target_node
8577 # distribute new instance config to the other nodes
8578 self.cfg.Update(instance, self.feedback_fn)
8580 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8584 msg = result.fail_msg
8586 logging.error("Instance migration succeeded, but finalization failed"
8587 " on the target node: %s", msg)
8588 raise errors.OpExecError("Could not finalize instance migration: %s" %
8591 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8592 self._EnsureSecondary(source_node)
8593 self._WaitUntilSync()
8594 self._GoStandalone()
8595 self._GoReconnect(False)
8596 self._WaitUntilSync()
8598 # If the instance's disk template is `rbd' and there was a successful
8599 # migration, unmap the device from the source node.
8600 if self.instance.disk_template == constants.DT_RBD:
8601 disks = _ExpandCheckDisks(instance, instance.disks)
8602 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8604 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8605 msg = result.fail_msg
8607 logging.error("Migration was successful, but couldn't unmap the"
8608 " block device %s on source node %s: %s",
8609 disk.iv_name, source_node, msg)
8610 logging.error("You need to unmap the device %s manually on %s",
8611 disk.iv_name, source_node)
8613 self.feedback_fn("* done")
8615 def _ExecFailover(self):
8616 """Failover an instance.
8618 The failover is done by shutting it down on its present node and
8619 starting it on the secondary.
8622 instance = self.instance
8623 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8625 source_node = instance.primary_node
8626 target_node = self.target_node
8628 if instance.admin_state == constants.ADMINST_UP:
8629 self.feedback_fn("* checking disk consistency between source and target")
8630 for (idx, dev) in enumerate(instance.disks):
8631 # for drbd, these are drbd over lvm
8632 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8634 if primary_node.offline:
8635 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8637 (primary_node.name, idx, target_node))
8638 elif not self.ignore_consistency:
8639 raise errors.OpExecError("Disk %s is degraded on target node,"
8640 " aborting failover" % idx)
8642 self.feedback_fn("* not checking disk consistency as instance is not"
8645 self.feedback_fn("* shutting down instance on source node")
8646 logging.info("Shutting down instance %s on node %s",
8647 instance.name, source_node)
8649 result = self.rpc.call_instance_shutdown(source_node, instance,
8650 self.shutdown_timeout)
8651 msg = result.fail_msg
8653 if self.ignore_consistency or primary_node.offline:
8654 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8655 " proceeding anyway; please make sure node"
8656 " %s is down; error details: %s",
8657 instance.name, source_node, source_node, msg)
8659 raise errors.OpExecError("Could not shutdown instance %s on"
8661 (instance.name, source_node, msg))
8663 self.feedback_fn("* deactivating the instance's disks on source node")
8664 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8665 raise errors.OpExecError("Can't shut down the instance's disks")
8667 instance.primary_node = target_node
8668 # distribute new instance config to the other nodes
8669 self.cfg.Update(instance, self.feedback_fn)
8671 # Only start the instance if it's marked as up
8672 if instance.admin_state == constants.ADMINST_UP:
8673 self.feedback_fn("* activating the instance's disks on target node %s" %
8675 logging.info("Starting instance %s on node %s",
8676 instance.name, target_node)
8678 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8679 ignore_secondaries=True)
8681 _ShutdownInstanceDisks(self.lu, instance)
8682 raise errors.OpExecError("Can't activate the instance's disks")
8684 self.feedback_fn("* starting the instance on the target node %s" %
8686 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8688 msg = result.fail_msg
8690 _ShutdownInstanceDisks(self.lu, instance)
8691 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8692 (instance.name, target_node, msg))
8694 def Exec(self, feedback_fn):
8695 """Perform the migration.
8698 self.feedback_fn = feedback_fn
8699 self.source_node = self.instance.primary_node
8701 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8702 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8703 self.target_node = self.instance.secondary_nodes[0]
8704 # Otherwise self.target_node has been populated either
8705 # directly, or through an iallocator.
8707 self.all_nodes = [self.source_node, self.target_node]
8708 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8709 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8712 feedback_fn("Failover instance %s" % self.instance.name)
8713 self._ExecFailover()
8715 feedback_fn("Migrating instance %s" % self.instance.name)
8718 return self._ExecCleanup()
8720 return self._ExecMigration()
8723 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8725 """Wrapper around L{_CreateBlockDevInner}.
8727 This method annotates the root device first.
8730 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8731 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8735 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8737 """Create a tree of block devices on a given node.
8739 If this device type has to be created on secondaries, create it and
8742 If not, just recurse to children keeping the same 'force' value.
8744 @attention: The device has to be annotated already.
8746 @param lu: the lu on whose behalf we execute
8747 @param node: the node on which to create the device
8748 @type instance: L{objects.Instance}
8749 @param instance: the instance which owns the device
8750 @type device: L{objects.Disk}
8751 @param device: the device to create
8752 @type force_create: boolean
8753 @param force_create: whether to force creation of this device; this
8754 will be change to True whenever we find a device which has
8755 CreateOnSecondary() attribute
8756 @param info: the extra 'metadata' we should attach to the device
8757 (this will be represented as a LVM tag)
8758 @type force_open: boolean
8759 @param force_open: this parameter will be passes to the
8760 L{backend.BlockdevCreate} function where it specifies
8761 whether we run on primary or not, and it affects both
8762 the child assembly and the device own Open() execution
8765 if device.CreateOnSecondary():
8769 for child in device.children:
8770 _CreateBlockDevInner(lu, node, instance, child, force_create,
8773 if not force_create:
8776 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8779 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8780 """Create a single block device on a given node.
8782 This will not recurse over children of the device, so they must be
8785 @param lu: the lu on whose behalf we execute
8786 @param node: the node on which to create the device
8787 @type instance: L{objects.Instance}
8788 @param instance: the instance which owns the device
8789 @type device: L{objects.Disk}
8790 @param device: the device to create
8791 @param info: the extra 'metadata' we should attach to the device
8792 (this will be represented as a LVM tag)
8793 @type force_open: boolean
8794 @param force_open: this parameter will be passes to the
8795 L{backend.BlockdevCreate} function where it specifies
8796 whether we run on primary or not, and it affects both
8797 the child assembly and the device own Open() execution
8800 lu.cfg.SetDiskID(device, node)
8801 result = lu.rpc.call_blockdev_create(node, device, device.size,
8802 instance.name, force_open, info)
8803 result.Raise("Can't create block device %s on"
8804 " node %s for instance %s" % (device, node, instance.name))
8805 if device.physical_id is None:
8806 device.physical_id = result.payload
8809 def _GenerateUniqueNames(lu, exts):
8810 """Generate a suitable LV name.
8812 This will generate a logical volume name for the given instance.
8817 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8818 results.append("%s%s" % (new_id, val))
8822 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8823 iv_name, p_minor, s_minor):
8824 """Generate a drbd8 device complete with its children.
8827 assert len(vgnames) == len(names) == 2
8828 port = lu.cfg.AllocatePort()
8829 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8831 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8832 logical_id=(vgnames[0], names[0]),
8834 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8835 size=constants.DRBD_META_SIZE,
8836 logical_id=(vgnames[1], names[1]),
8838 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8839 logical_id=(primary, secondary, port,
8842 children=[dev_data, dev_meta],
8843 iv_name=iv_name, params={})
8847 _DISK_TEMPLATE_NAME_PREFIX = {
8848 constants.DT_PLAIN: "",
8849 constants.DT_RBD: ".rbd",
8853 _DISK_TEMPLATE_DEVICE_TYPE = {
8854 constants.DT_PLAIN: constants.LD_LV,
8855 constants.DT_FILE: constants.LD_FILE,
8856 constants.DT_SHARED_FILE: constants.LD_FILE,
8857 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8858 constants.DT_RBD: constants.LD_RBD,
8862 def _GenerateDiskTemplate(
8863 lu, template_name, instance_name, primary_node, secondary_nodes,
8864 disk_info, file_storage_dir, file_driver, base_index,
8865 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8866 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8867 """Generate the entire disk layout for a given template type.
8870 #TODO: compute space requirements
8872 vgname = lu.cfg.GetVGName()
8873 disk_count = len(disk_info)
8876 if template_name == constants.DT_DISKLESS:
8878 elif template_name == constants.DT_DRBD8:
8879 if len(secondary_nodes) != 1:
8880 raise errors.ProgrammerError("Wrong template configuration")
8881 remote_node = secondary_nodes[0]
8882 minors = lu.cfg.AllocateDRBDMinor(
8883 [primary_node, remote_node] * len(disk_info), instance_name)
8885 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8887 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8890 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8891 for i in range(disk_count)]):
8892 names.append(lv_prefix + "_data")
8893 names.append(lv_prefix + "_meta")
8894 for idx, disk in enumerate(disk_info):
8895 disk_index = idx + base_index
8896 data_vg = disk.get(constants.IDISK_VG, vgname)
8897 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8898 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8899 disk[constants.IDISK_SIZE],
8901 names[idx * 2:idx * 2 + 2],
8902 "disk/%d" % disk_index,
8903 minors[idx * 2], minors[idx * 2 + 1])
8904 disk_dev.mode = disk[constants.IDISK_MODE]
8905 disks.append(disk_dev)
8908 raise errors.ProgrammerError("Wrong template configuration")
8910 if template_name == constants.DT_FILE:
8912 elif template_name == constants.DT_SHARED_FILE:
8913 _req_shr_file_storage()
8915 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8916 if name_prefix is None:
8919 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8920 (name_prefix, base_index + i)
8921 for i in range(disk_count)])
8923 if template_name == constants.DT_PLAIN:
8924 def logical_id_fn(idx, _, disk):
8925 vg = disk.get(constants.IDISK_VG, vgname)
8926 return (vg, names[idx])
8927 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8929 lambda _, disk_index, disk: (file_driver,
8930 "%s/disk%d" % (file_storage_dir,
8932 elif template_name == constants.DT_BLOCK:
8934 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8935 disk[constants.IDISK_ADOPT])
8936 elif template_name == constants.DT_RBD:
8937 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8939 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8941 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8943 for idx, disk in enumerate(disk_info):
8944 disk_index = idx + base_index
8945 size = disk[constants.IDISK_SIZE]
8946 feedback_fn("* disk %s, size %s" %
8947 (disk_index, utils.FormatUnit(size, "h")))
8948 disks.append(objects.Disk(dev_type=dev_type, size=size,
8949 logical_id=logical_id_fn(idx, disk_index, disk),
8950 iv_name="disk/%d" % disk_index,
8951 mode=disk[constants.IDISK_MODE],
8957 def _GetInstanceInfoText(instance):
8958 """Compute that text that should be added to the disk's metadata.
8961 return "originstname+%s" % instance.name
8964 def _CalcEta(time_taken, written, total_size):
8965 """Calculates the ETA based on size written and total size.
8967 @param time_taken: The time taken so far
8968 @param written: amount written so far
8969 @param total_size: The total size of data to be written
8970 @return: The remaining time in seconds
8973 avg_time = time_taken / float(written)
8974 return (total_size - written) * avg_time
8977 def _WipeDisks(lu, instance):
8978 """Wipes instance disks.
8980 @type lu: L{LogicalUnit}
8981 @param lu: the logical unit on whose behalf we execute
8982 @type instance: L{objects.Instance}
8983 @param instance: the instance whose disks we should create
8984 @return: the success of the wipe
8987 node = instance.primary_node
8989 for device in instance.disks:
8990 lu.cfg.SetDiskID(device, node)
8992 logging.info("Pause sync of instance %s disks", instance.name)
8993 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8994 (instance.disks, instance),
8996 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8998 for idx, success in enumerate(result.payload):
9000 logging.warn("pause-sync of instance %s for disks %d failed",
9004 for idx, device in enumerate(instance.disks):
9005 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9006 # MAX_WIPE_CHUNK at max
9007 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9008 constants.MIN_WIPE_CHUNK_PERCENT)
9009 # we _must_ make this an int, otherwise rounding errors will
9011 wipe_chunk_size = int(wipe_chunk_size)
9013 lu.LogInfo("* Wiping disk %d", idx)
9014 logging.info("Wiping disk %d for instance %s, node %s using"
9015 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9020 start_time = time.time()
9022 while offset < size:
9023 wipe_size = min(wipe_chunk_size, size - offset)
9024 logging.debug("Wiping disk %d, offset %s, chunk %s",
9025 idx, offset, wipe_size)
9026 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9028 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9029 (idx, offset, wipe_size))
9032 if now - last_output >= 60:
9033 eta = _CalcEta(now - start_time, offset, size)
9034 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9035 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9038 logging.info("Resume sync of instance %s disks", instance.name)
9040 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9041 (instance.disks, instance),
9045 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9046 " please have a look at the status and troubleshoot"
9047 " the issue: %s", node, result.fail_msg)
9049 for idx, success in enumerate(result.payload):
9051 lu.LogWarning("Resume sync of disk %d failed, please have a"
9052 " look at the status and troubleshoot the issue", idx)
9053 logging.warn("resume-sync of instance %s for disks %d failed",
9057 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9058 """Create all disks for an instance.
9060 This abstracts away some work from AddInstance.
9062 @type lu: L{LogicalUnit}
9063 @param lu: the logical unit on whose behalf we execute
9064 @type instance: L{objects.Instance}
9065 @param instance: the instance whose disks we should create
9067 @param to_skip: list of indices to skip
9068 @type target_node: string
9069 @param target_node: if passed, overrides the target node for creation
9071 @return: the success of the creation
9074 info = _GetInstanceInfoText(instance)
9075 if target_node is None:
9076 pnode = instance.primary_node
9077 all_nodes = instance.all_nodes
9082 if instance.disk_template in constants.DTS_FILEBASED:
9083 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9084 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9086 result.Raise("Failed to create directory '%s' on"
9087 " node %s" % (file_storage_dir, pnode))
9089 # Note: this needs to be kept in sync with adding of disks in
9090 # LUInstanceSetParams
9091 for idx, device in enumerate(instance.disks):
9092 if to_skip and idx in to_skip:
9094 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9096 for node in all_nodes:
9097 f_create = node == pnode
9098 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9101 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9102 """Remove all disks for an instance.
9104 This abstracts away some work from `AddInstance()` and
9105 `RemoveInstance()`. Note that in case some of the devices couldn't
9106 be removed, the removal will continue with the other ones (compare
9107 with `_CreateDisks()`).
9109 @type lu: L{LogicalUnit}
9110 @param lu: the logical unit on whose behalf we execute
9111 @type instance: L{objects.Instance}
9112 @param instance: the instance whose disks we should remove
9113 @type target_node: string
9114 @param target_node: used to override the node on which to remove the disks
9116 @return: the success of the removal
9119 logging.info("Removing block devices for instance %s", instance.name)
9122 ports_to_release = set()
9123 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9124 for (idx, device) in enumerate(anno_disks):
9126 edata = [(target_node, device)]
9128 edata = device.ComputeNodeTree(instance.primary_node)
9129 for node, disk in edata:
9130 lu.cfg.SetDiskID(disk, node)
9131 result = lu.rpc.call_blockdev_remove(node, disk)
9133 lu.LogWarning("Could not remove disk %s on node %s,"
9134 " continuing anyway: %s", idx, node, result.fail_msg)
9135 if not (result.offline and node != instance.primary_node):
9138 # if this is a DRBD disk, return its port to the pool
9139 if device.dev_type in constants.LDS_DRBD:
9140 ports_to_release.add(device.logical_id[2])
9142 if all_result or ignore_failures:
9143 for port in ports_to_release:
9144 lu.cfg.AddTcpUdpPort(port)
9146 if instance.disk_template == constants.DT_FILE:
9147 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9151 tgt = instance.primary_node
9152 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9154 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9155 file_storage_dir, instance.primary_node, result.fail_msg)
9161 def _ComputeDiskSizePerVG(disk_template, disks):
9162 """Compute disk size requirements in the volume group
9165 def _compute(disks, payload):
9166 """Universal algorithm.
9171 vgs[disk[constants.IDISK_VG]] = \
9172 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9176 # Required free disk space as a function of disk and swap space
9178 constants.DT_DISKLESS: {},
9179 constants.DT_PLAIN: _compute(disks, 0),
9180 # 128 MB are added for drbd metadata for each disk
9181 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9182 constants.DT_FILE: {},
9183 constants.DT_SHARED_FILE: {},
9186 if disk_template not in req_size_dict:
9187 raise errors.ProgrammerError("Disk template '%s' size requirement"
9188 " is unknown" % disk_template)
9190 return req_size_dict[disk_template]
9193 def _FilterVmNodes(lu, nodenames):
9194 """Filters out non-vm_capable nodes from a list.
9196 @type lu: L{LogicalUnit}
9197 @param lu: the logical unit for which we check
9198 @type nodenames: list
9199 @param nodenames: the list of nodes on which we should check
9201 @return: the list of vm-capable nodes
9204 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9205 return [name for name in nodenames if name not in vm_nodes]
9208 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9209 """Hypervisor parameter validation.
9211 This function abstract the hypervisor parameter validation to be
9212 used in both instance create and instance modify.
9214 @type lu: L{LogicalUnit}
9215 @param lu: the logical unit for which we check
9216 @type nodenames: list
9217 @param nodenames: the list of nodes on which we should check
9218 @type hvname: string
9219 @param hvname: the name of the hypervisor we should use
9220 @type hvparams: dict
9221 @param hvparams: the parameters which we need to check
9222 @raise errors.OpPrereqError: if the parameters are not valid
9225 nodenames = _FilterVmNodes(lu, nodenames)
9227 cluster = lu.cfg.GetClusterInfo()
9228 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9230 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9231 for node in nodenames:
9235 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9238 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9239 """OS parameters validation.
9241 @type lu: L{LogicalUnit}
9242 @param lu: the logical unit for which we check
9243 @type required: boolean
9244 @param required: whether the validation should fail if the OS is not
9246 @type nodenames: list
9247 @param nodenames: the list of nodes on which we should check
9248 @type osname: string
9249 @param osname: the name of the hypervisor we should use
9250 @type osparams: dict
9251 @param osparams: the parameters which we need to check
9252 @raise errors.OpPrereqError: if the parameters are not valid
9255 nodenames = _FilterVmNodes(lu, nodenames)
9256 result = lu.rpc.call_os_validate(nodenames, required, osname,
9257 [constants.OS_VALIDATE_PARAMETERS],
9259 for node, nres in result.items():
9260 # we don't check for offline cases since this should be run only
9261 # against the master node and/or an instance's nodes
9262 nres.Raise("OS Parameters validation failed on node %s" % node)
9263 if not nres.payload:
9264 lu.LogInfo("OS %s not found on node %s, validation skipped",
9268 class LUInstanceCreate(LogicalUnit):
9269 """Create an instance.
9272 HPATH = "instance-add"
9273 HTYPE = constants.HTYPE_INSTANCE
9276 def CheckArguments(self):
9280 # do not require name_check to ease forward/backward compatibility
9282 if self.op.no_install and self.op.start:
9283 self.LogInfo("No-installation mode selected, disabling startup")
9284 self.op.start = False
9285 # validate/normalize the instance name
9286 self.op.instance_name = \
9287 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9289 if self.op.ip_check and not self.op.name_check:
9290 # TODO: make the ip check more flexible and not depend on the name check
9291 raise errors.OpPrereqError("Cannot do IP address check without a name"
9292 " check", errors.ECODE_INVAL)
9294 # check nics' parameter names
9295 for nic in self.op.nics:
9296 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9298 # check disks. parameter names and consistent adopt/no-adopt strategy
9299 has_adopt = has_no_adopt = False
9300 for disk in self.op.disks:
9301 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9302 if constants.IDISK_ADOPT in disk:
9306 if has_adopt and has_no_adopt:
9307 raise errors.OpPrereqError("Either all disks are adopted or none is",
9310 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9311 raise errors.OpPrereqError("Disk adoption is not supported for the"
9312 " '%s' disk template" %
9313 self.op.disk_template,
9315 if self.op.iallocator is not None:
9316 raise errors.OpPrereqError("Disk adoption not allowed with an"
9317 " iallocator script", errors.ECODE_INVAL)
9318 if self.op.mode == constants.INSTANCE_IMPORT:
9319 raise errors.OpPrereqError("Disk adoption not allowed for"
9320 " instance import", errors.ECODE_INVAL)
9322 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9323 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9324 " but no 'adopt' parameter given" %
9325 self.op.disk_template,
9328 self.adopt_disks = has_adopt
9330 # instance name verification
9331 if self.op.name_check:
9332 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9333 self.op.instance_name = self.hostname1.name
9334 # used in CheckPrereq for ip ping check
9335 self.check_ip = self.hostname1.ip
9337 self.check_ip = None
9339 # file storage checks
9340 if (self.op.file_driver and
9341 not self.op.file_driver in constants.FILE_DRIVER):
9342 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9343 self.op.file_driver, errors.ECODE_INVAL)
9345 if self.op.disk_template == constants.DT_FILE:
9346 opcodes.RequireFileStorage()
9347 elif self.op.disk_template == constants.DT_SHARED_FILE:
9348 opcodes.RequireSharedFileStorage()
9350 ### Node/iallocator related checks
9351 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9353 if self.op.pnode is not None:
9354 if self.op.disk_template in constants.DTS_INT_MIRROR:
9355 if self.op.snode is None:
9356 raise errors.OpPrereqError("The networked disk templates need"
9357 " a mirror node", errors.ECODE_INVAL)
9359 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9361 self.op.snode = None
9363 self._cds = _GetClusterDomainSecret()
9365 if self.op.mode == constants.INSTANCE_IMPORT:
9366 # On import force_variant must be True, because if we forced it at
9367 # initial install, our only chance when importing it back is that it
9369 self.op.force_variant = True
9371 if self.op.no_install:
9372 self.LogInfo("No-installation mode has no effect during import")
9374 elif self.op.mode == constants.INSTANCE_CREATE:
9375 if self.op.os_type is None:
9376 raise errors.OpPrereqError("No guest OS specified",
9378 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9379 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9380 " installation" % self.op.os_type,
9382 if self.op.disk_template is None:
9383 raise errors.OpPrereqError("No disk template specified",
9386 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9387 # Check handshake to ensure both clusters have the same domain secret
9388 src_handshake = self.op.source_handshake
9389 if not src_handshake:
9390 raise errors.OpPrereqError("Missing source handshake",
9393 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9396 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9399 # Load and check source CA
9400 self.source_x509_ca_pem = self.op.source_x509_ca
9401 if not self.source_x509_ca_pem:
9402 raise errors.OpPrereqError("Missing source X509 CA",
9406 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9408 except OpenSSL.crypto.Error, err:
9409 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9410 (err, ), errors.ECODE_INVAL)
9412 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9413 if errcode is not None:
9414 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9417 self.source_x509_ca = cert
9419 src_instance_name = self.op.source_instance_name
9420 if not src_instance_name:
9421 raise errors.OpPrereqError("Missing source instance name",
9424 self.source_instance_name = \
9425 netutils.GetHostname(name=src_instance_name).name
9428 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9429 self.op.mode, errors.ECODE_INVAL)
9431 def ExpandNames(self):
9432 """ExpandNames for CreateInstance.
9434 Figure out the right locks for instance creation.
9437 self.needed_locks = {}
9439 instance_name = self.op.instance_name
9440 # this is just a preventive check, but someone might still add this
9441 # instance in the meantime, and creation will fail at lock-add time
9442 if instance_name in self.cfg.GetInstanceList():
9443 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9444 instance_name, errors.ECODE_EXISTS)
9446 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9448 if self.op.iallocator:
9449 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9450 # specifying a group on instance creation and then selecting nodes from
9452 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9453 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9455 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9456 nodelist = [self.op.pnode]
9457 if self.op.snode is not None:
9458 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9459 nodelist.append(self.op.snode)
9460 self.needed_locks[locking.LEVEL_NODE] = nodelist
9461 # Lock resources of instance's primary and secondary nodes (copy to
9462 # prevent accidential modification)
9463 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9465 # in case of import lock the source node too
9466 if self.op.mode == constants.INSTANCE_IMPORT:
9467 src_node = self.op.src_node
9468 src_path = self.op.src_path
9470 if src_path is None:
9471 self.op.src_path = src_path = self.op.instance_name
9473 if src_node is None:
9474 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9475 self.op.src_node = None
9476 if os.path.isabs(src_path):
9477 raise errors.OpPrereqError("Importing an instance from a path"
9478 " requires a source node option",
9481 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9482 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9483 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9484 if not os.path.isabs(src_path):
9485 self.op.src_path = src_path = \
9486 utils.PathJoin(constants.EXPORT_DIR, src_path)
9488 def _RunAllocator(self):
9489 """Run the allocator based on input opcode.
9492 nics = [n.ToDict() for n in self.nics]
9493 memory = self.be_full[constants.BE_MAXMEM]
9494 spindle_use = self.be_full[constants.BE_SPINDLE_USE]
9495 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
9496 disk_template=self.op.disk_template,
9499 vcpus=self.be_full[constants.BE_VCPUS],
9501 spindle_use=spindle_use,
9504 hypervisor=self.op.hypervisor)
9505 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9507 ial.Run(self.op.iallocator)
9510 raise errors.OpPrereqError("Can't compute nodes using"
9511 " iallocator '%s': %s" %
9512 (self.op.iallocator, ial.info),
9514 self.op.pnode = ial.result[0]
9515 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9516 self.op.instance_name, self.op.iallocator,
9517 utils.CommaJoin(ial.result))
9519 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9521 if req.RequiredNodes() == 2:
9522 self.op.snode = ial.result[1]
9524 def BuildHooksEnv(self):
9527 This runs on master, primary and secondary nodes of the instance.
9531 "ADD_MODE": self.op.mode,
9533 if self.op.mode == constants.INSTANCE_IMPORT:
9534 env["SRC_NODE"] = self.op.src_node
9535 env["SRC_PATH"] = self.op.src_path
9536 env["SRC_IMAGES"] = self.src_images
9538 env.update(_BuildInstanceHookEnv(
9539 name=self.op.instance_name,
9540 primary_node=self.op.pnode,
9541 secondary_nodes=self.secondaries,
9542 status=self.op.start,
9543 os_type=self.op.os_type,
9544 minmem=self.be_full[constants.BE_MINMEM],
9545 maxmem=self.be_full[constants.BE_MAXMEM],
9546 vcpus=self.be_full[constants.BE_VCPUS],
9547 nics=_NICListToTuple(self, self.nics),
9548 disk_template=self.op.disk_template,
9549 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9550 for d in self.disks],
9553 hypervisor_name=self.op.hypervisor,
9559 def BuildHooksNodes(self):
9560 """Build hooks nodes.
9563 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9566 def _ReadExportInfo(self):
9567 """Reads the export information from disk.
9569 It will override the opcode source node and path with the actual
9570 information, if these two were not specified before.
9572 @return: the export information
9575 assert self.op.mode == constants.INSTANCE_IMPORT
9577 src_node = self.op.src_node
9578 src_path = self.op.src_path
9580 if src_node is None:
9581 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9582 exp_list = self.rpc.call_export_list(locked_nodes)
9584 for node in exp_list:
9585 if exp_list[node].fail_msg:
9587 if src_path in exp_list[node].payload:
9589 self.op.src_node = src_node = node
9590 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9594 raise errors.OpPrereqError("No export found for relative path %s" %
9595 src_path, errors.ECODE_INVAL)
9597 _CheckNodeOnline(self, src_node)
9598 result = self.rpc.call_export_info(src_node, src_path)
9599 result.Raise("No export or invalid export found in dir %s" % src_path)
9601 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9602 if not export_info.has_section(constants.INISECT_EXP):
9603 raise errors.ProgrammerError("Corrupted export config",
9604 errors.ECODE_ENVIRON)
9606 ei_version = export_info.get(constants.INISECT_EXP, "version")
9607 if (int(ei_version) != constants.EXPORT_VERSION):
9608 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9609 (ei_version, constants.EXPORT_VERSION),
9610 errors.ECODE_ENVIRON)
9613 def _ReadExportParams(self, einfo):
9614 """Use export parameters as defaults.
9616 In case the opcode doesn't specify (as in override) some instance
9617 parameters, then try to use them from the export information, if
9621 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9623 if self.op.disk_template is None:
9624 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9625 self.op.disk_template = einfo.get(constants.INISECT_INS,
9627 if self.op.disk_template not in constants.DISK_TEMPLATES:
9628 raise errors.OpPrereqError("Disk template specified in configuration"
9629 " file is not one of the allowed values:"
9631 " ".join(constants.DISK_TEMPLATES),
9634 raise errors.OpPrereqError("No disk template specified and the export"
9635 " is missing the disk_template information",
9638 if not self.op.disks:
9640 # TODO: import the disk iv_name too
9641 for idx in range(constants.MAX_DISKS):
9642 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9643 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9644 disks.append({constants.IDISK_SIZE: disk_sz})
9645 self.op.disks = disks
9646 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9647 raise errors.OpPrereqError("No disk info specified and the export"
9648 " is missing the disk information",
9651 if not self.op.nics:
9653 for idx in range(constants.MAX_NICS):
9654 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9656 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9657 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9664 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9665 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9667 if (self.op.hypervisor is None and
9668 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9669 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9671 if einfo.has_section(constants.INISECT_HYP):
9672 # use the export parameters but do not override the ones
9673 # specified by the user
9674 for name, value in einfo.items(constants.INISECT_HYP):
9675 if name not in self.op.hvparams:
9676 self.op.hvparams[name] = value
9678 if einfo.has_section(constants.INISECT_BEP):
9679 # use the parameters, without overriding
9680 for name, value in einfo.items(constants.INISECT_BEP):
9681 if name not in self.op.beparams:
9682 self.op.beparams[name] = value
9683 # Compatibility for the old "memory" be param
9684 if name == constants.BE_MEMORY:
9685 if constants.BE_MAXMEM not in self.op.beparams:
9686 self.op.beparams[constants.BE_MAXMEM] = value
9687 if constants.BE_MINMEM not in self.op.beparams:
9688 self.op.beparams[constants.BE_MINMEM] = value
9690 # try to read the parameters old style, from the main section
9691 for name in constants.BES_PARAMETERS:
9692 if (name not in self.op.beparams and
9693 einfo.has_option(constants.INISECT_INS, name)):
9694 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9696 if einfo.has_section(constants.INISECT_OSP):
9697 # use the parameters, without overriding
9698 for name, value in einfo.items(constants.INISECT_OSP):
9699 if name not in self.op.osparams:
9700 self.op.osparams[name] = value
9702 def _RevertToDefaults(self, cluster):
9703 """Revert the instance parameters to the default values.
9707 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9708 for name in self.op.hvparams.keys():
9709 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9710 del self.op.hvparams[name]
9712 be_defs = cluster.SimpleFillBE({})
9713 for name in self.op.beparams.keys():
9714 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9715 del self.op.beparams[name]
9717 nic_defs = cluster.SimpleFillNIC({})
9718 for nic in self.op.nics:
9719 for name in constants.NICS_PARAMETERS:
9720 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9723 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9724 for name in self.op.osparams.keys():
9725 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9726 del self.op.osparams[name]
9728 def _CalculateFileStorageDir(self):
9729 """Calculate final instance file storage dir.
9732 # file storage dir calculation/check
9733 self.instance_file_storage_dir = None
9734 if self.op.disk_template in constants.DTS_FILEBASED:
9735 # build the full file storage dir path
9738 if self.op.disk_template == constants.DT_SHARED_FILE:
9739 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9741 get_fsd_fn = self.cfg.GetFileStorageDir
9743 cfg_storagedir = get_fsd_fn()
9744 if not cfg_storagedir:
9745 raise errors.OpPrereqError("Cluster file storage dir not defined",
9747 joinargs.append(cfg_storagedir)
9749 if self.op.file_storage_dir is not None:
9750 joinargs.append(self.op.file_storage_dir)
9752 joinargs.append(self.op.instance_name)
9754 # pylint: disable=W0142
9755 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9757 def CheckPrereq(self): # pylint: disable=R0914
9758 """Check prerequisites.
9761 self._CalculateFileStorageDir()
9763 if self.op.mode == constants.INSTANCE_IMPORT:
9764 export_info = self._ReadExportInfo()
9765 self._ReadExportParams(export_info)
9766 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9768 self._old_instance_name = None
9770 if (not self.cfg.GetVGName() and
9771 self.op.disk_template not in constants.DTS_NOT_LVM):
9772 raise errors.OpPrereqError("Cluster does not support lvm-based"
9773 " instances", errors.ECODE_STATE)
9775 if (self.op.hypervisor is None or
9776 self.op.hypervisor == constants.VALUE_AUTO):
9777 self.op.hypervisor = self.cfg.GetHypervisorType()
9779 cluster = self.cfg.GetClusterInfo()
9780 enabled_hvs = cluster.enabled_hypervisors
9781 if self.op.hypervisor not in enabled_hvs:
9782 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9784 (self.op.hypervisor, ",".join(enabled_hvs)),
9787 # Check tag validity
9788 for tag in self.op.tags:
9789 objects.TaggableObject.ValidateTag(tag)
9791 # check hypervisor parameter syntax (locally)
9792 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9793 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9795 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9796 hv_type.CheckParameterSyntax(filled_hvp)
9797 self.hv_full = filled_hvp
9798 # check that we don't specify global parameters on an instance
9799 _CheckGlobalHvParams(self.op.hvparams)
9801 # fill and remember the beparams dict
9802 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9803 for param, value in self.op.beparams.iteritems():
9804 if value == constants.VALUE_AUTO:
9805 self.op.beparams[param] = default_beparams[param]
9806 objects.UpgradeBeParams(self.op.beparams)
9807 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9808 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9810 # build os parameters
9811 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9813 # now that hvp/bep are in final format, let's reset to defaults,
9815 if self.op.identify_defaults:
9816 self._RevertToDefaults(cluster)
9820 for idx, nic in enumerate(self.op.nics):
9821 nic_mode_req = nic.get(constants.INIC_MODE, None)
9822 nic_mode = nic_mode_req
9823 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9824 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9826 # in routed mode, for the first nic, the default ip is 'auto'
9827 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9828 default_ip_mode = constants.VALUE_AUTO
9830 default_ip_mode = constants.VALUE_NONE
9832 # ip validity checks
9833 ip = nic.get(constants.INIC_IP, default_ip_mode)
9834 if ip is None or ip.lower() == constants.VALUE_NONE:
9836 elif ip.lower() == constants.VALUE_AUTO:
9837 if not self.op.name_check:
9838 raise errors.OpPrereqError("IP address set to auto but name checks"
9839 " have been skipped",
9841 nic_ip = self.hostname1.ip
9843 if not netutils.IPAddress.IsValid(ip):
9844 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9848 # TODO: check the ip address for uniqueness
9849 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9850 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9853 # MAC address verification
9854 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9855 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9856 mac = utils.NormalizeAndValidateMac(mac)
9859 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9860 except errors.ReservationError:
9861 raise errors.OpPrereqError("MAC address %s already in use"
9862 " in cluster" % mac,
9863 errors.ECODE_NOTUNIQUE)
9865 # Build nic parameters
9866 link = nic.get(constants.INIC_LINK, None)
9867 if link == constants.VALUE_AUTO:
9868 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9871 nicparams[constants.NIC_MODE] = nic_mode
9873 nicparams[constants.NIC_LINK] = link
9875 check_params = cluster.SimpleFillNIC(nicparams)
9876 objects.NIC.CheckParameterSyntax(check_params)
9877 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9879 # disk checks/pre-build
9880 default_vg = self.cfg.GetVGName()
9882 for disk in self.op.disks:
9883 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9884 if mode not in constants.DISK_ACCESS_SET:
9885 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9886 mode, errors.ECODE_INVAL)
9887 size = disk.get(constants.IDISK_SIZE, None)
9889 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9892 except (TypeError, ValueError):
9893 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9896 data_vg = disk.get(constants.IDISK_VG, default_vg)
9898 constants.IDISK_SIZE: size,
9899 constants.IDISK_MODE: mode,
9900 constants.IDISK_VG: data_vg,
9902 if constants.IDISK_METAVG in disk:
9903 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9904 if constants.IDISK_ADOPT in disk:
9905 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9906 self.disks.append(new_disk)
9908 if self.op.mode == constants.INSTANCE_IMPORT:
9910 for idx in range(len(self.disks)):
9911 option = "disk%d_dump" % idx
9912 if export_info.has_option(constants.INISECT_INS, option):
9913 # FIXME: are the old os-es, disk sizes, etc. useful?
9914 export_name = export_info.get(constants.INISECT_INS, option)
9915 image = utils.PathJoin(self.op.src_path, export_name)
9916 disk_images.append(image)
9918 disk_images.append(False)
9920 self.src_images = disk_images
9922 if self.op.instance_name == self._old_instance_name:
9923 for idx, nic in enumerate(self.nics):
9924 if nic.mac == constants.VALUE_AUTO:
9925 nic_mac_ini = "nic%d_mac" % idx
9926 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9928 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9930 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9931 if self.op.ip_check:
9932 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9933 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9934 (self.check_ip, self.op.instance_name),
9935 errors.ECODE_NOTUNIQUE)
9937 #### mac address generation
9938 # By generating here the mac address both the allocator and the hooks get
9939 # the real final mac address rather than the 'auto' or 'generate' value.
9940 # There is a race condition between the generation and the instance object
9941 # creation, which means that we know the mac is valid now, but we're not
9942 # sure it will be when we actually add the instance. If things go bad
9943 # adding the instance will abort because of a duplicate mac, and the
9944 # creation job will fail.
9945 for nic in self.nics:
9946 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9947 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9951 if self.op.iallocator is not None:
9952 self._RunAllocator()
9954 # Release all unneeded node locks
9955 _ReleaseLocks(self, locking.LEVEL_NODE,
9956 keep=filter(None, [self.op.pnode, self.op.snode,
9958 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9959 keep=filter(None, [self.op.pnode, self.op.snode,
9962 #### node related checks
9964 # check primary node
9965 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9966 assert self.pnode is not None, \
9967 "Cannot retrieve locked node %s" % self.op.pnode
9969 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9970 pnode.name, errors.ECODE_STATE)
9972 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9973 pnode.name, errors.ECODE_STATE)
9974 if not pnode.vm_capable:
9975 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9976 " '%s'" % pnode.name, errors.ECODE_STATE)
9978 self.secondaries = []
9980 # mirror node verification
9981 if self.op.disk_template in constants.DTS_INT_MIRROR:
9982 if self.op.snode == pnode.name:
9983 raise errors.OpPrereqError("The secondary node cannot be the"
9984 " primary node", errors.ECODE_INVAL)
9985 _CheckNodeOnline(self, self.op.snode)
9986 _CheckNodeNotDrained(self, self.op.snode)
9987 _CheckNodeVmCapable(self, self.op.snode)
9988 self.secondaries.append(self.op.snode)
9990 snode = self.cfg.GetNodeInfo(self.op.snode)
9991 if pnode.group != snode.group:
9992 self.LogWarning("The primary and secondary nodes are in two"
9993 " different node groups; the disk parameters"
9994 " from the first disk's node group will be"
9997 nodenames = [pnode.name] + self.secondaries
9999 # Verify instance specs
10000 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10002 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10003 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10004 constants.ISPEC_DISK_COUNT: len(self.disks),
10005 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10006 constants.ISPEC_NIC_COUNT: len(self.nics),
10007 constants.ISPEC_SPINDLE_USE: spindle_use,
10010 group_info = self.cfg.GetNodeGroup(pnode.group)
10011 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10012 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10013 if not self.op.ignore_ipolicy and res:
10014 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10015 " policy: %s") % (pnode.group,
10016 utils.CommaJoin(res)),
10017 errors.ECODE_INVAL)
10019 if not self.adopt_disks:
10020 if self.op.disk_template == constants.DT_RBD:
10021 # _CheckRADOSFreeSpace() is just a placeholder.
10022 # Any function that checks prerequisites can be placed here.
10023 # Check if there is enough space on the RADOS cluster.
10024 _CheckRADOSFreeSpace()
10026 # Check lv size requirements, if not adopting
10027 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10028 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10030 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10031 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10032 disk[constants.IDISK_ADOPT])
10033 for disk in self.disks])
10034 if len(all_lvs) != len(self.disks):
10035 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10036 errors.ECODE_INVAL)
10037 for lv_name in all_lvs:
10039 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10040 # to ReserveLV uses the same syntax
10041 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10042 except errors.ReservationError:
10043 raise errors.OpPrereqError("LV named %s used by another instance" %
10044 lv_name, errors.ECODE_NOTUNIQUE)
10046 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10047 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10049 node_lvs = self.rpc.call_lv_list([pnode.name],
10050 vg_names.payload.keys())[pnode.name]
10051 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10052 node_lvs = node_lvs.payload
10054 delta = all_lvs.difference(node_lvs.keys())
10056 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10057 utils.CommaJoin(delta),
10058 errors.ECODE_INVAL)
10059 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10061 raise errors.OpPrereqError("Online logical volumes found, cannot"
10062 " adopt: %s" % utils.CommaJoin(online_lvs),
10063 errors.ECODE_STATE)
10064 # update the size of disk based on what is found
10065 for dsk in self.disks:
10066 dsk[constants.IDISK_SIZE] = \
10067 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10068 dsk[constants.IDISK_ADOPT])][0]))
10070 elif self.op.disk_template == constants.DT_BLOCK:
10071 # Normalize and de-duplicate device paths
10072 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10073 for disk in self.disks])
10074 if len(all_disks) != len(self.disks):
10075 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10076 errors.ECODE_INVAL)
10077 baddisks = [d for d in all_disks
10078 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10080 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10081 " cannot be adopted" %
10082 (", ".join(baddisks),
10083 constants.ADOPTABLE_BLOCKDEV_ROOT),
10084 errors.ECODE_INVAL)
10086 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10087 list(all_disks))[pnode.name]
10088 node_disks.Raise("Cannot get block device information from node %s" %
10090 node_disks = node_disks.payload
10091 delta = all_disks.difference(node_disks.keys())
10093 raise errors.OpPrereqError("Missing block device(s): %s" %
10094 utils.CommaJoin(delta),
10095 errors.ECODE_INVAL)
10096 for dsk in self.disks:
10097 dsk[constants.IDISK_SIZE] = \
10098 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10100 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10102 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10103 # check OS parameters (remotely)
10104 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10106 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10108 # memory check on primary node
10109 #TODO(dynmem): use MINMEM for checking
10111 _CheckNodeFreeMemory(self, self.pnode.name,
10112 "creating instance %s" % self.op.instance_name,
10113 self.be_full[constants.BE_MAXMEM],
10114 self.op.hypervisor)
10116 self.dry_run_result = list(nodenames)
10118 def Exec(self, feedback_fn):
10119 """Create and add the instance to the cluster.
10122 instance = self.op.instance_name
10123 pnode_name = self.pnode.name
10125 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10126 self.owned_locks(locking.LEVEL_NODE)), \
10127 "Node locks differ from node resource locks"
10129 ht_kind = self.op.hypervisor
10130 if ht_kind in constants.HTS_REQ_PORT:
10131 network_port = self.cfg.AllocatePort()
10133 network_port = None
10135 # This is ugly but we got a chicken-egg problem here
10136 # We can only take the group disk parameters, as the instance
10137 # has no disks yet (we are generating them right here).
10138 node = self.cfg.GetNodeInfo(pnode_name)
10139 nodegroup = self.cfg.GetNodeGroup(node.group)
10140 disks = _GenerateDiskTemplate(self,
10141 self.op.disk_template,
10142 instance, pnode_name,
10145 self.instance_file_storage_dir,
10146 self.op.file_driver,
10149 self.cfg.GetGroupDiskParams(nodegroup))
10151 iobj = objects.Instance(name=instance, os=self.op.os_type,
10152 primary_node=pnode_name,
10153 nics=self.nics, disks=disks,
10154 disk_template=self.op.disk_template,
10155 admin_state=constants.ADMINST_DOWN,
10156 network_port=network_port,
10157 beparams=self.op.beparams,
10158 hvparams=self.op.hvparams,
10159 hypervisor=self.op.hypervisor,
10160 osparams=self.op.osparams,
10164 for tag in self.op.tags:
10167 if self.adopt_disks:
10168 if self.op.disk_template == constants.DT_PLAIN:
10169 # rename LVs to the newly-generated names; we need to construct
10170 # 'fake' LV disks with the old data, plus the new unique_id
10171 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10173 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10174 rename_to.append(t_dsk.logical_id)
10175 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10176 self.cfg.SetDiskID(t_dsk, pnode_name)
10177 result = self.rpc.call_blockdev_rename(pnode_name,
10178 zip(tmp_disks, rename_to))
10179 result.Raise("Failed to rename adoped LVs")
10181 feedback_fn("* creating instance disks...")
10183 _CreateDisks(self, iobj)
10184 except errors.OpExecError:
10185 self.LogWarning("Device creation failed, reverting...")
10187 _RemoveDisks(self, iobj)
10189 self.cfg.ReleaseDRBDMinors(instance)
10192 feedback_fn("adding instance %s to cluster config" % instance)
10194 self.cfg.AddInstance(iobj, self.proc.GetECId())
10196 # Declare that we don't want to remove the instance lock anymore, as we've
10197 # added the instance to the config
10198 del self.remove_locks[locking.LEVEL_INSTANCE]
10200 if self.op.mode == constants.INSTANCE_IMPORT:
10201 # Release unused nodes
10202 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10204 # Release all nodes
10205 _ReleaseLocks(self, locking.LEVEL_NODE)
10208 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10209 feedback_fn("* wiping instance disks...")
10211 _WipeDisks(self, iobj)
10212 except errors.OpExecError, err:
10213 logging.exception("Wiping disks failed")
10214 self.LogWarning("Wiping instance disks failed (%s)", err)
10218 # Something is already wrong with the disks, don't do anything else
10220 elif self.op.wait_for_sync:
10221 disk_abort = not _WaitForSync(self, iobj)
10222 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10223 # make sure the disks are not degraded (still sync-ing is ok)
10224 feedback_fn("* checking mirrors status")
10225 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10230 _RemoveDisks(self, iobj)
10231 self.cfg.RemoveInstance(iobj.name)
10232 # Make sure the instance lock gets removed
10233 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10234 raise errors.OpExecError("There are some degraded disks for"
10237 # Release all node resource locks
10238 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10240 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10241 # we need to set the disks ID to the primary node, since the
10242 # preceding code might or might have not done it, depending on
10243 # disk template and other options
10244 for disk in iobj.disks:
10245 self.cfg.SetDiskID(disk, pnode_name)
10246 if self.op.mode == constants.INSTANCE_CREATE:
10247 if not self.op.no_install:
10248 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10249 not self.op.wait_for_sync)
10251 feedback_fn("* pausing disk sync to install instance OS")
10252 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10255 for idx, success in enumerate(result.payload):
10257 logging.warn("pause-sync of instance %s for disk %d failed",
10260 feedback_fn("* running the instance OS create scripts...")
10261 # FIXME: pass debug option from opcode to backend
10263 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10264 self.op.debug_level)
10266 feedback_fn("* resuming disk sync")
10267 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10270 for idx, success in enumerate(result.payload):
10272 logging.warn("resume-sync of instance %s for disk %d failed",
10275 os_add_result.Raise("Could not add os for instance %s"
10276 " on node %s" % (instance, pnode_name))
10279 if self.op.mode == constants.INSTANCE_IMPORT:
10280 feedback_fn("* running the instance OS import scripts...")
10284 for idx, image in enumerate(self.src_images):
10288 # FIXME: pass debug option from opcode to backend
10289 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10290 constants.IEIO_FILE, (image, ),
10291 constants.IEIO_SCRIPT,
10292 (iobj.disks[idx], idx),
10294 transfers.append(dt)
10297 masterd.instance.TransferInstanceData(self, feedback_fn,
10298 self.op.src_node, pnode_name,
10299 self.pnode.secondary_ip,
10301 if not compat.all(import_result):
10302 self.LogWarning("Some disks for instance %s on node %s were not"
10303 " imported successfully" % (instance, pnode_name))
10305 rename_from = self._old_instance_name
10307 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10308 feedback_fn("* preparing remote import...")
10309 # The source cluster will stop the instance before attempting to make
10310 # a connection. In some cases stopping an instance can take a long
10311 # time, hence the shutdown timeout is added to the connection
10313 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10314 self.op.source_shutdown_timeout)
10315 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10317 assert iobj.primary_node == self.pnode.name
10319 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10320 self.source_x509_ca,
10321 self._cds, timeouts)
10322 if not compat.all(disk_results):
10323 # TODO: Should the instance still be started, even if some disks
10324 # failed to import (valid for local imports, too)?
10325 self.LogWarning("Some disks for instance %s on node %s were not"
10326 " imported successfully" % (instance, pnode_name))
10328 rename_from = self.source_instance_name
10331 # also checked in the prereq part
10332 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10335 # Run rename script on newly imported instance
10336 assert iobj.name == instance
10337 feedback_fn("Running rename script for %s" % instance)
10338 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10340 self.op.debug_level)
10341 if result.fail_msg:
10342 self.LogWarning("Failed to run rename script for %s on node"
10343 " %s: %s" % (instance, pnode_name, result.fail_msg))
10345 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10348 iobj.admin_state = constants.ADMINST_UP
10349 self.cfg.Update(iobj, feedback_fn)
10350 logging.info("Starting instance %s on node %s", instance, pnode_name)
10351 feedback_fn("* starting instance...")
10352 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10354 result.Raise("Could not start instance")
10356 return list(iobj.all_nodes)
10359 def _CheckRADOSFreeSpace():
10360 """Compute disk size requirements inside the RADOS cluster.
10363 # For the RADOS cluster we assume there is always enough space.
10367 class LUInstanceConsole(NoHooksLU):
10368 """Connect to an instance's console.
10370 This is somewhat special in that it returns the command line that
10371 you need to run on the master node in order to connect to the
10377 def ExpandNames(self):
10378 self.share_locks = _ShareAll()
10379 self._ExpandAndLockInstance()
10381 def CheckPrereq(self):
10382 """Check prerequisites.
10384 This checks that the instance is in the cluster.
10387 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10388 assert self.instance is not None, \
10389 "Cannot retrieve locked instance %s" % self.op.instance_name
10390 _CheckNodeOnline(self, self.instance.primary_node)
10392 def Exec(self, feedback_fn):
10393 """Connect to the console of an instance
10396 instance = self.instance
10397 node = instance.primary_node
10399 node_insts = self.rpc.call_instance_list([node],
10400 [instance.hypervisor])[node]
10401 node_insts.Raise("Can't get node information from %s" % node)
10403 if instance.name not in node_insts.payload:
10404 if instance.admin_state == constants.ADMINST_UP:
10405 state = constants.INSTST_ERRORDOWN
10406 elif instance.admin_state == constants.ADMINST_DOWN:
10407 state = constants.INSTST_ADMINDOWN
10409 state = constants.INSTST_ADMINOFFLINE
10410 raise errors.OpExecError("Instance %s is not running (state %s)" %
10411 (instance.name, state))
10413 logging.debug("Connecting to console of %s on %s", instance.name, node)
10415 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10418 def _GetInstanceConsole(cluster, instance):
10419 """Returns console information for an instance.
10421 @type cluster: L{objects.Cluster}
10422 @type instance: L{objects.Instance}
10426 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10427 # beparams and hvparams are passed separately, to avoid editing the
10428 # instance and then saving the defaults in the instance itself.
10429 hvparams = cluster.FillHV(instance)
10430 beparams = cluster.FillBE(instance)
10431 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10433 assert console.instance == instance.name
10434 assert console.Validate()
10436 return console.ToDict()
10439 class LUInstanceReplaceDisks(LogicalUnit):
10440 """Replace the disks of an instance.
10443 HPATH = "mirrors-replace"
10444 HTYPE = constants.HTYPE_INSTANCE
10447 def CheckArguments(self):
10448 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10449 self.op.iallocator)
10451 def ExpandNames(self):
10452 self._ExpandAndLockInstance()
10454 assert locking.LEVEL_NODE not in self.needed_locks
10455 assert locking.LEVEL_NODE_RES not in self.needed_locks
10456 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10458 assert self.op.iallocator is None or self.op.remote_node is None, \
10459 "Conflicting options"
10461 if self.op.remote_node is not None:
10462 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10464 # Warning: do not remove the locking of the new secondary here
10465 # unless DRBD8.AddChildren is changed to work in parallel;
10466 # currently it doesn't since parallel invocations of
10467 # FindUnusedMinor will conflict
10468 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10469 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10471 self.needed_locks[locking.LEVEL_NODE] = []
10472 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10474 if self.op.iallocator is not None:
10475 # iallocator will select a new node in the same group
10476 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10478 self.needed_locks[locking.LEVEL_NODE_RES] = []
10480 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10481 self.op.iallocator, self.op.remote_node,
10482 self.op.disks, False, self.op.early_release,
10483 self.op.ignore_ipolicy)
10485 self.tasklets = [self.replacer]
10487 def DeclareLocks(self, level):
10488 if level == locking.LEVEL_NODEGROUP:
10489 assert self.op.remote_node is None
10490 assert self.op.iallocator is not None
10491 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10493 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10494 # Lock all groups used by instance optimistically; this requires going
10495 # via the node before it's locked, requiring verification later on
10496 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10497 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10499 elif level == locking.LEVEL_NODE:
10500 if self.op.iallocator is not None:
10501 assert self.op.remote_node is None
10502 assert not self.needed_locks[locking.LEVEL_NODE]
10504 # Lock member nodes of all locked groups
10505 self.needed_locks[locking.LEVEL_NODE] = \
10507 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10508 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10510 self._LockInstancesNodes()
10511 elif level == locking.LEVEL_NODE_RES:
10513 self.needed_locks[locking.LEVEL_NODE_RES] = \
10514 self.needed_locks[locking.LEVEL_NODE]
10516 def BuildHooksEnv(self):
10517 """Build hooks env.
10519 This runs on the master, the primary and all the secondaries.
10522 instance = self.replacer.instance
10524 "MODE": self.op.mode,
10525 "NEW_SECONDARY": self.op.remote_node,
10526 "OLD_SECONDARY": instance.secondary_nodes[0],
10528 env.update(_BuildInstanceHookEnvByObject(self, instance))
10531 def BuildHooksNodes(self):
10532 """Build hooks nodes.
10535 instance = self.replacer.instance
10537 self.cfg.GetMasterNode(),
10538 instance.primary_node,
10540 if self.op.remote_node is not None:
10541 nl.append(self.op.remote_node)
10544 def CheckPrereq(self):
10545 """Check prerequisites.
10548 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10549 self.op.iallocator is None)
10551 # Verify if node group locks are still correct
10552 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10554 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10556 return LogicalUnit.CheckPrereq(self)
10559 class TLReplaceDisks(Tasklet):
10560 """Replaces disks for an instance.
10562 Note: Locking is not within the scope of this class.
10565 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10566 disks, delay_iallocator, early_release, ignore_ipolicy):
10567 """Initializes this class.
10570 Tasklet.__init__(self, lu)
10573 self.instance_name = instance_name
10575 self.iallocator_name = iallocator_name
10576 self.remote_node = remote_node
10578 self.delay_iallocator = delay_iallocator
10579 self.early_release = early_release
10580 self.ignore_ipolicy = ignore_ipolicy
10583 self.instance = None
10584 self.new_node = None
10585 self.target_node = None
10586 self.other_node = None
10587 self.remote_node_info = None
10588 self.node_secondary_ip = None
10591 def CheckArguments(mode, remote_node, ialloc):
10592 """Helper function for users of this class.
10595 # check for valid parameter combination
10596 if mode == constants.REPLACE_DISK_CHG:
10597 if remote_node is None and ialloc is None:
10598 raise errors.OpPrereqError("When changing the secondary either an"
10599 " iallocator script must be used or the"
10600 " new node given", errors.ECODE_INVAL)
10602 if remote_node is not None and ialloc is not None:
10603 raise errors.OpPrereqError("Give either the iallocator or the new"
10604 " secondary, not both", errors.ECODE_INVAL)
10606 elif remote_node is not None or ialloc is not None:
10607 # Not replacing the secondary
10608 raise errors.OpPrereqError("The iallocator and new node options can"
10609 " only be used when changing the"
10610 " secondary node", errors.ECODE_INVAL)
10613 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10614 """Compute a new secondary node using an IAllocator.
10617 req = iallocator.IAReqRelocate(name=instance_name,
10618 relocate_from=list(relocate_from))
10619 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10621 ial.Run(iallocator_name)
10623 if not ial.success:
10624 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10625 " %s" % (iallocator_name, ial.info),
10626 errors.ECODE_NORES)
10628 remote_node_name = ial.result[0]
10630 lu.LogInfo("Selected new secondary for instance '%s': %s",
10631 instance_name, remote_node_name)
10633 return remote_node_name
10635 def _FindFaultyDisks(self, node_name):
10636 """Wrapper for L{_FindFaultyInstanceDisks}.
10639 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10642 def _CheckDisksActivated(self, instance):
10643 """Checks if the instance disks are activated.
10645 @param instance: The instance to check disks
10646 @return: True if they are activated, False otherwise
10649 nodes = instance.all_nodes
10651 for idx, dev in enumerate(instance.disks):
10653 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10654 self.cfg.SetDiskID(dev, node)
10656 result = _BlockdevFind(self, node, dev, instance)
10660 elif result.fail_msg or not result.payload:
10665 def CheckPrereq(self):
10666 """Check prerequisites.
10668 This checks that the instance is in the cluster.
10671 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10672 assert instance is not None, \
10673 "Cannot retrieve locked instance %s" % self.instance_name
10675 if instance.disk_template != constants.DT_DRBD8:
10676 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10677 " instances", errors.ECODE_INVAL)
10679 if len(instance.secondary_nodes) != 1:
10680 raise errors.OpPrereqError("The instance has a strange layout,"
10681 " expected one secondary but found %d" %
10682 len(instance.secondary_nodes),
10683 errors.ECODE_FAULT)
10685 if not self.delay_iallocator:
10686 self._CheckPrereq2()
10688 def _CheckPrereq2(self):
10689 """Check prerequisites, second part.
10691 This function should always be part of CheckPrereq. It was separated and is
10692 now called from Exec because during node evacuation iallocator was only
10693 called with an unmodified cluster model, not taking planned changes into
10697 instance = self.instance
10698 secondary_node = instance.secondary_nodes[0]
10700 if self.iallocator_name is None:
10701 remote_node = self.remote_node
10703 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10704 instance.name, instance.secondary_nodes)
10706 if remote_node is None:
10707 self.remote_node_info = None
10709 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10710 "Remote node '%s' is not locked" % remote_node
10712 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10713 assert self.remote_node_info is not None, \
10714 "Cannot retrieve locked node %s" % remote_node
10716 if remote_node == self.instance.primary_node:
10717 raise errors.OpPrereqError("The specified node is the primary node of"
10718 " the instance", errors.ECODE_INVAL)
10720 if remote_node == secondary_node:
10721 raise errors.OpPrereqError("The specified node is already the"
10722 " secondary node of the instance",
10723 errors.ECODE_INVAL)
10725 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10726 constants.REPLACE_DISK_CHG):
10727 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10728 errors.ECODE_INVAL)
10730 if self.mode == constants.REPLACE_DISK_AUTO:
10731 if not self._CheckDisksActivated(instance):
10732 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10733 " first" % self.instance_name,
10734 errors.ECODE_STATE)
10735 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10736 faulty_secondary = self._FindFaultyDisks(secondary_node)
10738 if faulty_primary and faulty_secondary:
10739 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10740 " one node and can not be repaired"
10741 " automatically" % self.instance_name,
10742 errors.ECODE_STATE)
10745 self.disks = faulty_primary
10746 self.target_node = instance.primary_node
10747 self.other_node = secondary_node
10748 check_nodes = [self.target_node, self.other_node]
10749 elif faulty_secondary:
10750 self.disks = faulty_secondary
10751 self.target_node = secondary_node
10752 self.other_node = instance.primary_node
10753 check_nodes = [self.target_node, self.other_node]
10759 # Non-automatic modes
10760 if self.mode == constants.REPLACE_DISK_PRI:
10761 self.target_node = instance.primary_node
10762 self.other_node = secondary_node
10763 check_nodes = [self.target_node, self.other_node]
10765 elif self.mode == constants.REPLACE_DISK_SEC:
10766 self.target_node = secondary_node
10767 self.other_node = instance.primary_node
10768 check_nodes = [self.target_node, self.other_node]
10770 elif self.mode == constants.REPLACE_DISK_CHG:
10771 self.new_node = remote_node
10772 self.other_node = instance.primary_node
10773 self.target_node = secondary_node
10774 check_nodes = [self.new_node, self.other_node]
10776 _CheckNodeNotDrained(self.lu, remote_node)
10777 _CheckNodeVmCapable(self.lu, remote_node)
10779 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10780 assert old_node_info is not None
10781 if old_node_info.offline and not self.early_release:
10782 # doesn't make sense to delay the release
10783 self.early_release = True
10784 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10785 " early-release mode", secondary_node)
10788 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10791 # If not specified all disks should be replaced
10793 self.disks = range(len(self.instance.disks))
10795 # TODO: This is ugly, but right now we can't distinguish between internal
10796 # submitted opcode and external one. We should fix that.
10797 if self.remote_node_info:
10798 # We change the node, lets verify it still meets instance policy
10799 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10800 cluster = self.cfg.GetClusterInfo()
10801 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10803 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10804 ignore=self.ignore_ipolicy)
10806 for node in check_nodes:
10807 _CheckNodeOnline(self.lu, node)
10809 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10812 if node_name is not None)
10814 # Release unneeded node and node resource locks
10815 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10816 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10818 # Release any owned node group
10819 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10820 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10822 # Check whether disks are valid
10823 for disk_idx in self.disks:
10824 instance.FindDisk(disk_idx)
10826 # Get secondary node IP addresses
10827 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10828 in self.cfg.GetMultiNodeInfo(touched_nodes))
10830 def Exec(self, feedback_fn):
10831 """Execute disk replacement.
10833 This dispatches the disk replacement to the appropriate handler.
10836 if self.delay_iallocator:
10837 self._CheckPrereq2()
10840 # Verify owned locks before starting operation
10841 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10842 assert set(owned_nodes) == set(self.node_secondary_ip), \
10843 ("Incorrect node locks, owning %s, expected %s" %
10844 (owned_nodes, self.node_secondary_ip.keys()))
10845 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10846 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10848 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10849 assert list(owned_instances) == [self.instance_name], \
10850 "Instance '%s' not locked" % self.instance_name
10852 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10853 "Should not own any node group lock at this point"
10856 feedback_fn("No disks need replacement")
10859 feedback_fn("Replacing disk(s) %s for %s" %
10860 (utils.CommaJoin(self.disks), self.instance.name))
10862 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10864 # Activate the instance disks if we're replacing them on a down instance
10866 _StartInstanceDisks(self.lu, self.instance, True)
10869 # Should we replace the secondary node?
10870 if self.new_node is not None:
10871 fn = self._ExecDrbd8Secondary
10873 fn = self._ExecDrbd8DiskOnly
10875 result = fn(feedback_fn)
10877 # Deactivate the instance disks if we're replacing them on a
10880 _SafeShutdownInstanceDisks(self.lu, self.instance)
10882 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10885 # Verify owned locks
10886 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10887 nodes = frozenset(self.node_secondary_ip)
10888 assert ((self.early_release and not owned_nodes) or
10889 (not self.early_release and not (set(owned_nodes) - nodes))), \
10890 ("Not owning the correct locks, early_release=%s, owned=%r,"
10891 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10895 def _CheckVolumeGroup(self, nodes):
10896 self.lu.LogInfo("Checking volume groups")
10898 vgname = self.cfg.GetVGName()
10900 # Make sure volume group exists on all involved nodes
10901 results = self.rpc.call_vg_list(nodes)
10903 raise errors.OpExecError("Can't list volume groups on the nodes")
10906 res = results[node]
10907 res.Raise("Error checking node %s" % node)
10908 if vgname not in res.payload:
10909 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10912 def _CheckDisksExistence(self, nodes):
10913 # Check disk existence
10914 for idx, dev in enumerate(self.instance.disks):
10915 if idx not in self.disks:
10919 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10920 self.cfg.SetDiskID(dev, node)
10922 result = _BlockdevFind(self, node, dev, self.instance)
10924 msg = result.fail_msg
10925 if msg or not result.payload:
10927 msg = "disk not found"
10928 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10931 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10932 for idx, dev in enumerate(self.instance.disks):
10933 if idx not in self.disks:
10936 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10939 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10940 on_primary, ldisk=ldisk):
10941 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10942 " replace disks for instance %s" %
10943 (node_name, self.instance.name))
10945 def _CreateNewStorage(self, node_name):
10946 """Create new storage on the primary or secondary node.
10948 This is only used for same-node replaces, not for changing the
10949 secondary node, hence we don't want to modify the existing disk.
10954 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10955 for idx, dev in enumerate(disks):
10956 if idx not in self.disks:
10959 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10961 self.cfg.SetDiskID(dev, node_name)
10963 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10964 names = _GenerateUniqueNames(self.lu, lv_names)
10966 (data_disk, meta_disk) = dev.children
10967 vg_data = data_disk.logical_id[0]
10968 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10969 logical_id=(vg_data, names[0]),
10970 params=data_disk.params)
10971 vg_meta = meta_disk.logical_id[0]
10972 lv_meta = objects.Disk(dev_type=constants.LD_LV,
10973 size=constants.DRBD_META_SIZE,
10974 logical_id=(vg_meta, names[1]),
10975 params=meta_disk.params)
10977 new_lvs = [lv_data, lv_meta]
10978 old_lvs = [child.Copy() for child in dev.children]
10979 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10981 # we pass force_create=True to force the LVM creation
10982 for new_lv in new_lvs:
10983 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10984 _GetInstanceInfoText(self.instance), False)
10988 def _CheckDevices(self, node_name, iv_names):
10989 for name, (dev, _, _) in iv_names.iteritems():
10990 self.cfg.SetDiskID(dev, node_name)
10992 result = _BlockdevFind(self, node_name, dev, self.instance)
10994 msg = result.fail_msg
10995 if msg or not result.payload:
10997 msg = "disk not found"
10998 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11001 if result.payload.is_degraded:
11002 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11004 def _RemoveOldStorage(self, node_name, iv_names):
11005 for name, (_, old_lvs, _) in iv_names.iteritems():
11006 self.lu.LogInfo("Remove logical volumes for %s" % name)
11009 self.cfg.SetDiskID(lv, node_name)
11011 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11013 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11014 hint="remove unused LVs manually")
11016 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11017 """Replace a disk on the primary or secondary for DRBD 8.
11019 The algorithm for replace is quite complicated:
11021 1. for each disk to be replaced:
11023 1. create new LVs on the target node with unique names
11024 1. detach old LVs from the drbd device
11025 1. rename old LVs to name_replaced.<time_t>
11026 1. rename new LVs to old LVs
11027 1. attach the new LVs (with the old names now) to the drbd device
11029 1. wait for sync across all devices
11031 1. for each modified disk:
11033 1. remove old LVs (which have the name name_replaces.<time_t>)
11035 Failures are not very well handled.
11040 # Step: check device activation
11041 self.lu.LogStep(1, steps_total, "Check device existence")
11042 self._CheckDisksExistence([self.other_node, self.target_node])
11043 self._CheckVolumeGroup([self.target_node, self.other_node])
11045 # Step: check other node consistency
11046 self.lu.LogStep(2, steps_total, "Check peer consistency")
11047 self._CheckDisksConsistency(self.other_node,
11048 self.other_node == self.instance.primary_node,
11051 # Step: create new storage
11052 self.lu.LogStep(3, steps_total, "Allocate new storage")
11053 iv_names = self._CreateNewStorage(self.target_node)
11055 # Step: for each lv, detach+rename*2+attach
11056 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11057 for dev, old_lvs, new_lvs in iv_names.itervalues():
11058 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11060 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11062 result.Raise("Can't detach drbd from local storage on node"
11063 " %s for device %s" % (self.target_node, dev.iv_name))
11065 #cfg.Update(instance)
11067 # ok, we created the new LVs, so now we know we have the needed
11068 # storage; as such, we proceed on the target node to rename
11069 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11070 # using the assumption that logical_id == physical_id (which in
11071 # turn is the unique_id on that node)
11073 # FIXME(iustin): use a better name for the replaced LVs
11074 temp_suffix = int(time.time())
11075 ren_fn = lambda d, suff: (d.physical_id[0],
11076 d.physical_id[1] + "_replaced-%s" % suff)
11078 # Build the rename list based on what LVs exist on the node
11079 rename_old_to_new = []
11080 for to_ren in old_lvs:
11081 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11082 if not result.fail_msg and result.payload:
11084 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11086 self.lu.LogInfo("Renaming the old LVs on the target node")
11087 result = self.rpc.call_blockdev_rename(self.target_node,
11089 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11091 # Now we rename the new LVs to the old LVs
11092 self.lu.LogInfo("Renaming the new LVs on the target node")
11093 rename_new_to_old = [(new, old.physical_id)
11094 for old, new in zip(old_lvs, new_lvs)]
11095 result = self.rpc.call_blockdev_rename(self.target_node,
11097 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11099 # Intermediate steps of in memory modifications
11100 for old, new in zip(old_lvs, new_lvs):
11101 new.logical_id = old.logical_id
11102 self.cfg.SetDiskID(new, self.target_node)
11104 # We need to modify old_lvs so that removal later removes the
11105 # right LVs, not the newly added ones; note that old_lvs is a
11107 for disk in old_lvs:
11108 disk.logical_id = ren_fn(disk, temp_suffix)
11109 self.cfg.SetDiskID(disk, self.target_node)
11111 # Now that the new lvs have the old name, we can add them to the device
11112 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11113 result = self.rpc.call_blockdev_addchildren(self.target_node,
11114 (dev, self.instance), new_lvs)
11115 msg = result.fail_msg
11117 for new_lv in new_lvs:
11118 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11121 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11122 hint=("cleanup manually the unused logical"
11124 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11126 cstep = itertools.count(5)
11128 if self.early_release:
11129 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11130 self._RemoveOldStorage(self.target_node, iv_names)
11131 # TODO: Check if releasing locks early still makes sense
11132 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11134 # Release all resource locks except those used by the instance
11135 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11136 keep=self.node_secondary_ip.keys())
11138 # Release all node locks while waiting for sync
11139 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11141 # TODO: Can the instance lock be downgraded here? Take the optional disk
11142 # shutdown in the caller into consideration.
11145 # This can fail as the old devices are degraded and _WaitForSync
11146 # does a combined result over all disks, so we don't check its return value
11147 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11148 _WaitForSync(self.lu, self.instance)
11150 # Check all devices manually
11151 self._CheckDevices(self.instance.primary_node, iv_names)
11153 # Step: remove old storage
11154 if not self.early_release:
11155 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11156 self._RemoveOldStorage(self.target_node, iv_names)
11158 def _ExecDrbd8Secondary(self, feedback_fn):
11159 """Replace the secondary node for DRBD 8.
11161 The algorithm for replace is quite complicated:
11162 - for all disks of the instance:
11163 - create new LVs on the new node with same names
11164 - shutdown the drbd device on the old secondary
11165 - disconnect the drbd network on the primary
11166 - create the drbd device on the new secondary
11167 - network attach the drbd on the primary, using an artifice:
11168 the drbd code for Attach() will connect to the network if it
11169 finds a device which is connected to the good local disks but
11170 not network enabled
11171 - wait for sync across all devices
11172 - remove all disks from the old secondary
11174 Failures are not very well handled.
11179 pnode = self.instance.primary_node
11181 # Step: check device activation
11182 self.lu.LogStep(1, steps_total, "Check device existence")
11183 self._CheckDisksExistence([self.instance.primary_node])
11184 self._CheckVolumeGroup([self.instance.primary_node])
11186 # Step: check other node consistency
11187 self.lu.LogStep(2, steps_total, "Check peer consistency")
11188 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11190 # Step: create new storage
11191 self.lu.LogStep(3, steps_total, "Allocate new storage")
11192 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11193 for idx, dev in enumerate(disks):
11194 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11195 (self.new_node, idx))
11196 # we pass force_create=True to force LVM creation
11197 for new_lv in dev.children:
11198 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11199 True, _GetInstanceInfoText(self.instance), False)
11201 # Step 4: dbrd minors and drbd setups changes
11202 # after this, we must manually remove the drbd minors on both the
11203 # error and the success paths
11204 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11205 minors = self.cfg.AllocateDRBDMinor([self.new_node
11206 for dev in self.instance.disks],
11207 self.instance.name)
11208 logging.debug("Allocated minors %r", minors)
11211 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11212 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11213 (self.new_node, idx))
11214 # create new devices on new_node; note that we create two IDs:
11215 # one without port, so the drbd will be activated without
11216 # networking information on the new node at this stage, and one
11217 # with network, for the latter activation in step 4
11218 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11219 if self.instance.primary_node == o_node1:
11222 assert self.instance.primary_node == o_node2, "Three-node instance?"
11225 new_alone_id = (self.instance.primary_node, self.new_node, None,
11226 p_minor, new_minor, o_secret)
11227 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11228 p_minor, new_minor, o_secret)
11230 iv_names[idx] = (dev, dev.children, new_net_id)
11231 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11233 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11234 logical_id=new_alone_id,
11235 children=dev.children,
11238 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11241 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11243 _GetInstanceInfoText(self.instance), False)
11244 except errors.GenericError:
11245 self.cfg.ReleaseDRBDMinors(self.instance.name)
11248 # We have new devices, shutdown the drbd on the old secondary
11249 for idx, dev in enumerate(self.instance.disks):
11250 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11251 self.cfg.SetDiskID(dev, self.target_node)
11252 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11253 (dev, self.instance)).fail_msg
11255 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11256 "node: %s" % (idx, msg),
11257 hint=("Please cleanup this device manually as"
11258 " soon as possible"))
11260 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11261 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11262 self.instance.disks)[pnode]
11264 msg = result.fail_msg
11266 # detaches didn't succeed (unlikely)
11267 self.cfg.ReleaseDRBDMinors(self.instance.name)
11268 raise errors.OpExecError("Can't detach the disks from the network on"
11269 " old node: %s" % (msg,))
11271 # if we managed to detach at least one, we update all the disks of
11272 # the instance to point to the new secondary
11273 self.lu.LogInfo("Updating instance configuration")
11274 for dev, _, new_logical_id in iv_names.itervalues():
11275 dev.logical_id = new_logical_id
11276 self.cfg.SetDiskID(dev, self.instance.primary_node)
11278 self.cfg.Update(self.instance, feedback_fn)
11280 # Release all node locks (the configuration has been updated)
11281 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11283 # and now perform the drbd attach
11284 self.lu.LogInfo("Attaching primary drbds to new secondary"
11285 " (standalone => connected)")
11286 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11288 self.node_secondary_ip,
11289 (self.instance.disks, self.instance),
11290 self.instance.name,
11292 for to_node, to_result in result.items():
11293 msg = to_result.fail_msg
11295 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11297 hint=("please do a gnt-instance info to see the"
11298 " status of disks"))
11300 cstep = itertools.count(5)
11302 if self.early_release:
11303 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11304 self._RemoveOldStorage(self.target_node, iv_names)
11305 # TODO: Check if releasing locks early still makes sense
11306 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11308 # Release all resource locks except those used by the instance
11309 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11310 keep=self.node_secondary_ip.keys())
11312 # TODO: Can the instance lock be downgraded here? Take the optional disk
11313 # shutdown in the caller into consideration.
11316 # This can fail as the old devices are degraded and _WaitForSync
11317 # does a combined result over all disks, so we don't check its return value
11318 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11319 _WaitForSync(self.lu, self.instance)
11321 # Check all devices manually
11322 self._CheckDevices(self.instance.primary_node, iv_names)
11324 # Step: remove old storage
11325 if not self.early_release:
11326 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11327 self._RemoveOldStorage(self.target_node, iv_names)
11330 class LURepairNodeStorage(NoHooksLU):
11331 """Repairs the volume group on a node.
11336 def CheckArguments(self):
11337 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11339 storage_type = self.op.storage_type
11341 if (constants.SO_FIX_CONSISTENCY not in
11342 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11343 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11344 " repaired" % storage_type,
11345 errors.ECODE_INVAL)
11347 def ExpandNames(self):
11348 self.needed_locks = {
11349 locking.LEVEL_NODE: [self.op.node_name],
11352 def _CheckFaultyDisks(self, instance, node_name):
11353 """Ensure faulty disks abort the opcode or at least warn."""
11355 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11357 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11358 " node '%s'" % (instance.name, node_name),
11359 errors.ECODE_STATE)
11360 except errors.OpPrereqError, err:
11361 if self.op.ignore_consistency:
11362 self.proc.LogWarning(str(err.args[0]))
11366 def CheckPrereq(self):
11367 """Check prerequisites.
11370 # Check whether any instance on this node has faulty disks
11371 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11372 if inst.admin_state != constants.ADMINST_UP:
11374 check_nodes = set(inst.all_nodes)
11375 check_nodes.discard(self.op.node_name)
11376 for inst_node_name in check_nodes:
11377 self._CheckFaultyDisks(inst, inst_node_name)
11379 def Exec(self, feedback_fn):
11380 feedback_fn("Repairing storage unit '%s' on %s ..." %
11381 (self.op.name, self.op.node_name))
11383 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11384 result = self.rpc.call_storage_execute(self.op.node_name,
11385 self.op.storage_type, st_args,
11387 constants.SO_FIX_CONSISTENCY)
11388 result.Raise("Failed to repair storage unit '%s' on %s" %
11389 (self.op.name, self.op.node_name))
11392 class LUNodeEvacuate(NoHooksLU):
11393 """Evacuates instances off a list of nodes.
11398 _MODE2IALLOCATOR = {
11399 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11400 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11401 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11403 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11404 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11405 constants.IALLOCATOR_NEVAC_MODES)
11407 def CheckArguments(self):
11408 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11410 def ExpandNames(self):
11411 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11413 if self.op.remote_node is not None:
11414 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11415 assert self.op.remote_node
11417 if self.op.remote_node == self.op.node_name:
11418 raise errors.OpPrereqError("Can not use evacuated node as a new"
11419 " secondary node", errors.ECODE_INVAL)
11421 if self.op.mode != constants.NODE_EVAC_SEC:
11422 raise errors.OpPrereqError("Without the use of an iallocator only"
11423 " secondary instances can be evacuated",
11424 errors.ECODE_INVAL)
11427 self.share_locks = _ShareAll()
11428 self.needed_locks = {
11429 locking.LEVEL_INSTANCE: [],
11430 locking.LEVEL_NODEGROUP: [],
11431 locking.LEVEL_NODE: [],
11434 # Determine nodes (via group) optimistically, needs verification once locks
11435 # have been acquired
11436 self.lock_nodes = self._DetermineNodes()
11438 def _DetermineNodes(self):
11439 """Gets the list of nodes to operate on.
11442 if self.op.remote_node is None:
11443 # Iallocator will choose any node(s) in the same group
11444 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11446 group_nodes = frozenset([self.op.remote_node])
11448 # Determine nodes to be locked
11449 return set([self.op.node_name]) | group_nodes
11451 def _DetermineInstances(self):
11452 """Builds list of instances to operate on.
11455 assert self.op.mode in constants.NODE_EVAC_MODES
11457 if self.op.mode == constants.NODE_EVAC_PRI:
11458 # Primary instances only
11459 inst_fn = _GetNodePrimaryInstances
11460 assert self.op.remote_node is None, \
11461 "Evacuating primary instances requires iallocator"
11462 elif self.op.mode == constants.NODE_EVAC_SEC:
11463 # Secondary instances only
11464 inst_fn = _GetNodeSecondaryInstances
11467 assert self.op.mode == constants.NODE_EVAC_ALL
11468 inst_fn = _GetNodeInstances
11469 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11471 raise errors.OpPrereqError("Due to an issue with the iallocator"
11472 " interface it is not possible to evacuate"
11473 " all instances at once; specify explicitly"
11474 " whether to evacuate primary or secondary"
11476 errors.ECODE_INVAL)
11478 return inst_fn(self.cfg, self.op.node_name)
11480 def DeclareLocks(self, level):
11481 if level == locking.LEVEL_INSTANCE:
11482 # Lock instances optimistically, needs verification once node and group
11483 # locks have been acquired
11484 self.needed_locks[locking.LEVEL_INSTANCE] = \
11485 set(i.name for i in self._DetermineInstances())
11487 elif level == locking.LEVEL_NODEGROUP:
11488 # Lock node groups for all potential target nodes optimistically, needs
11489 # verification once nodes have been acquired
11490 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11491 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11493 elif level == locking.LEVEL_NODE:
11494 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11496 def CheckPrereq(self):
11498 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11499 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11500 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11502 need_nodes = self._DetermineNodes()
11504 if not owned_nodes.issuperset(need_nodes):
11505 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11506 " locks were acquired, current nodes are"
11507 " are '%s', used to be '%s'; retry the"
11509 (self.op.node_name,
11510 utils.CommaJoin(need_nodes),
11511 utils.CommaJoin(owned_nodes)),
11512 errors.ECODE_STATE)
11514 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11515 if owned_groups != wanted_groups:
11516 raise errors.OpExecError("Node groups changed since locks were acquired,"
11517 " current groups are '%s', used to be '%s';"
11518 " retry the operation" %
11519 (utils.CommaJoin(wanted_groups),
11520 utils.CommaJoin(owned_groups)))
11522 # Determine affected instances
11523 self.instances = self._DetermineInstances()
11524 self.instance_names = [i.name for i in self.instances]
11526 if set(self.instance_names) != owned_instances:
11527 raise errors.OpExecError("Instances on node '%s' changed since locks"
11528 " were acquired, current instances are '%s',"
11529 " used to be '%s'; retry the operation" %
11530 (self.op.node_name,
11531 utils.CommaJoin(self.instance_names),
11532 utils.CommaJoin(owned_instances)))
11534 if self.instance_names:
11535 self.LogInfo("Evacuating instances from node '%s': %s",
11537 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11539 self.LogInfo("No instances to evacuate from node '%s'",
11542 if self.op.remote_node is not None:
11543 for i in self.instances:
11544 if i.primary_node == self.op.remote_node:
11545 raise errors.OpPrereqError("Node %s is the primary node of"
11546 " instance %s, cannot use it as"
11548 (self.op.remote_node, i.name),
11549 errors.ECODE_INVAL)
11551 def Exec(self, feedback_fn):
11552 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11554 if not self.instance_names:
11555 # No instances to evacuate
11558 elif self.op.iallocator is not None:
11559 # TODO: Implement relocation to other group
11560 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11561 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11562 instances=list(self.instance_names))
11563 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11565 ial.Run(self.op.iallocator)
11567 if not ial.success:
11568 raise errors.OpPrereqError("Can't compute node evacuation using"
11569 " iallocator '%s': %s" %
11570 (self.op.iallocator, ial.info),
11571 errors.ECODE_NORES)
11573 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11575 elif self.op.remote_node is not None:
11576 assert self.op.mode == constants.NODE_EVAC_SEC
11578 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11579 remote_node=self.op.remote_node,
11581 mode=constants.REPLACE_DISK_CHG,
11582 early_release=self.op.early_release)]
11583 for instance_name in self.instance_names
11587 raise errors.ProgrammerError("No iallocator or remote node")
11589 return ResultWithJobs(jobs)
11592 def _SetOpEarlyRelease(early_release, op):
11593 """Sets C{early_release} flag on opcodes if available.
11597 op.early_release = early_release
11598 except AttributeError:
11599 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11604 def _NodeEvacDest(use_nodes, group, nodes):
11605 """Returns group or nodes depending on caller's choice.
11609 return utils.CommaJoin(nodes)
11614 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11615 """Unpacks the result of change-group and node-evacuate iallocator requests.
11617 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11618 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11620 @type lu: L{LogicalUnit}
11621 @param lu: Logical unit instance
11622 @type alloc_result: tuple/list
11623 @param alloc_result: Result from iallocator
11624 @type early_release: bool
11625 @param early_release: Whether to release locks early if possible
11626 @type use_nodes: bool
11627 @param use_nodes: Whether to display node names instead of groups
11630 (moved, failed, jobs) = alloc_result
11633 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11634 for (name, reason) in failed)
11635 lu.LogWarning("Unable to evacuate instances %s", failreason)
11636 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11639 lu.LogInfo("Instances to be moved: %s",
11640 utils.CommaJoin("%s (to %s)" %
11641 (name, _NodeEvacDest(use_nodes, group, nodes))
11642 for (name, group, nodes) in moved))
11644 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11645 map(opcodes.OpCode.LoadOpCode, ops))
11649 class LUInstanceGrowDisk(LogicalUnit):
11650 """Grow a disk of an instance.
11653 HPATH = "disk-grow"
11654 HTYPE = constants.HTYPE_INSTANCE
11657 def ExpandNames(self):
11658 self._ExpandAndLockInstance()
11659 self.needed_locks[locking.LEVEL_NODE] = []
11660 self.needed_locks[locking.LEVEL_NODE_RES] = []
11661 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11662 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11664 def DeclareLocks(self, level):
11665 if level == locking.LEVEL_NODE:
11666 self._LockInstancesNodes()
11667 elif level == locking.LEVEL_NODE_RES:
11669 self.needed_locks[locking.LEVEL_NODE_RES] = \
11670 self.needed_locks[locking.LEVEL_NODE][:]
11672 def BuildHooksEnv(self):
11673 """Build hooks env.
11675 This runs on the master, the primary and all the secondaries.
11679 "DISK": self.op.disk,
11680 "AMOUNT": self.op.amount,
11681 "ABSOLUTE": self.op.absolute,
11683 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11686 def BuildHooksNodes(self):
11687 """Build hooks nodes.
11690 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11693 def CheckPrereq(self):
11694 """Check prerequisites.
11696 This checks that the instance is in the cluster.
11699 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11700 assert instance is not None, \
11701 "Cannot retrieve locked instance %s" % self.op.instance_name
11702 nodenames = list(instance.all_nodes)
11703 for node in nodenames:
11704 _CheckNodeOnline(self, node)
11706 self.instance = instance
11708 if instance.disk_template not in constants.DTS_GROWABLE:
11709 raise errors.OpPrereqError("Instance's disk layout does not support"
11710 " growing", errors.ECODE_INVAL)
11712 self.disk = instance.FindDisk(self.op.disk)
11714 if self.op.absolute:
11715 self.target = self.op.amount
11716 self.delta = self.target - self.disk.size
11718 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11719 "current disk size (%s)" %
11720 (utils.FormatUnit(self.target, "h"),
11721 utils.FormatUnit(self.disk.size, "h")),
11722 errors.ECODE_STATE)
11724 self.delta = self.op.amount
11725 self.target = self.disk.size + self.delta
11727 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11728 utils.FormatUnit(self.delta, "h"),
11729 errors.ECODE_INVAL)
11731 if instance.disk_template not in (constants.DT_FILE,
11732 constants.DT_SHARED_FILE,
11734 # TODO: check the free disk space for file, when that feature will be
11736 _CheckNodesFreeDiskPerVG(self, nodenames,
11737 self.disk.ComputeGrowth(self.delta))
11739 def Exec(self, feedback_fn):
11740 """Execute disk grow.
11743 instance = self.instance
11746 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11747 assert (self.owned_locks(locking.LEVEL_NODE) ==
11748 self.owned_locks(locking.LEVEL_NODE_RES))
11750 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11752 raise errors.OpExecError("Cannot activate block device to grow")
11754 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11755 (self.op.disk, instance.name,
11756 utils.FormatUnit(self.delta, "h"),
11757 utils.FormatUnit(self.target, "h")))
11759 # First run all grow ops in dry-run mode
11760 for node in instance.all_nodes:
11761 self.cfg.SetDiskID(disk, node)
11762 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11764 result.Raise("Grow request failed to node %s" % node)
11766 # We know that (as far as we can test) operations across different
11767 # nodes will succeed, time to run it for real on the backing storage
11768 for node in instance.all_nodes:
11769 self.cfg.SetDiskID(disk, node)
11770 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11772 result.Raise("Grow request failed to node %s" % node)
11774 # And now execute it for logical storage, on the primary node
11775 node = instance.primary_node
11776 self.cfg.SetDiskID(disk, node)
11777 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11779 result.Raise("Grow request failed to node %s" % node)
11781 disk.RecordGrow(self.delta)
11782 self.cfg.Update(instance, feedback_fn)
11784 # Changes have been recorded, release node lock
11785 _ReleaseLocks(self, locking.LEVEL_NODE)
11787 # Downgrade lock while waiting for sync
11788 self.glm.downgrade(locking.LEVEL_INSTANCE)
11790 if self.op.wait_for_sync:
11791 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11793 self.proc.LogWarning("Disk sync-ing has not returned a good"
11794 " status; please check the instance")
11795 if instance.admin_state != constants.ADMINST_UP:
11796 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11797 elif instance.admin_state != constants.ADMINST_UP:
11798 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11799 " not supposed to be running because no wait for"
11800 " sync mode was requested")
11802 assert self.owned_locks(locking.LEVEL_NODE_RES)
11803 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11806 class LUInstanceQueryData(NoHooksLU):
11807 """Query runtime instance data.
11812 def ExpandNames(self):
11813 self.needed_locks = {}
11815 # Use locking if requested or when non-static information is wanted
11816 if not (self.op.static or self.op.use_locking):
11817 self.LogWarning("Non-static data requested, locks need to be acquired")
11818 self.op.use_locking = True
11820 if self.op.instances or not self.op.use_locking:
11821 # Expand instance names right here
11822 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11824 # Will use acquired locks
11825 self.wanted_names = None
11827 if self.op.use_locking:
11828 self.share_locks = _ShareAll()
11830 if self.wanted_names is None:
11831 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11833 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11835 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11836 self.needed_locks[locking.LEVEL_NODE] = []
11837 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11839 def DeclareLocks(self, level):
11840 if self.op.use_locking:
11841 if level == locking.LEVEL_NODEGROUP:
11842 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11844 # Lock all groups used by instances optimistically; this requires going
11845 # via the node before it's locked, requiring verification later on
11846 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11847 frozenset(group_uuid
11848 for instance_name in owned_instances
11850 self.cfg.GetInstanceNodeGroups(instance_name))
11852 elif level == locking.LEVEL_NODE:
11853 self._LockInstancesNodes()
11855 def CheckPrereq(self):
11856 """Check prerequisites.
11858 This only checks the optional instance list against the existing names.
11861 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11862 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11863 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11865 if self.wanted_names is None:
11866 assert self.op.use_locking, "Locking was not used"
11867 self.wanted_names = owned_instances
11869 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11871 if self.op.use_locking:
11872 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11875 assert not (owned_instances or owned_groups or owned_nodes)
11877 self.wanted_instances = instances.values()
11879 def _ComputeBlockdevStatus(self, node, instance, dev):
11880 """Returns the status of a block device
11883 if self.op.static or not node:
11886 self.cfg.SetDiskID(dev, node)
11888 result = self.rpc.call_blockdev_find(node, dev)
11892 result.Raise("Can't compute disk status for %s" % instance.name)
11894 status = result.payload
11898 return (status.dev_path, status.major, status.minor,
11899 status.sync_percent, status.estimated_time,
11900 status.is_degraded, status.ldisk_status)
11902 def _ComputeDiskStatus(self, instance, snode, dev):
11903 """Compute block device status.
11906 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11908 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11910 def _ComputeDiskStatusInner(self, instance, snode, dev):
11911 """Compute block device status.
11913 @attention: The device has to be annotated already.
11916 if dev.dev_type in constants.LDS_DRBD:
11917 # we change the snode then (otherwise we use the one passed in)
11918 if dev.logical_id[0] == instance.primary_node:
11919 snode = dev.logical_id[1]
11921 snode = dev.logical_id[0]
11923 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11925 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11928 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11935 "iv_name": dev.iv_name,
11936 "dev_type": dev.dev_type,
11937 "logical_id": dev.logical_id,
11938 "physical_id": dev.physical_id,
11939 "pstatus": dev_pstatus,
11940 "sstatus": dev_sstatus,
11941 "children": dev_children,
11946 def Exec(self, feedback_fn):
11947 """Gather and return data"""
11950 cluster = self.cfg.GetClusterInfo()
11952 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11953 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11955 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11956 for node in nodes.values()))
11958 group2name_fn = lambda uuid: groups[uuid].name
11960 for instance in self.wanted_instances:
11961 pnode = nodes[instance.primary_node]
11963 if self.op.static or pnode.offline:
11964 remote_state = None
11966 self.LogWarning("Primary node %s is marked offline, returning static"
11967 " information only for instance %s" %
11968 (pnode.name, instance.name))
11970 remote_info = self.rpc.call_instance_info(instance.primary_node,
11972 instance.hypervisor)
11973 remote_info.Raise("Error checking node %s" % instance.primary_node)
11974 remote_info = remote_info.payload
11975 if remote_info and "state" in remote_info:
11976 remote_state = "up"
11978 if instance.admin_state == constants.ADMINST_UP:
11979 remote_state = "down"
11981 remote_state = instance.admin_state
11983 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11986 snodes_group_uuids = [nodes[snode_name].group
11987 for snode_name in instance.secondary_nodes]
11989 result[instance.name] = {
11990 "name": instance.name,
11991 "config_state": instance.admin_state,
11992 "run_state": remote_state,
11993 "pnode": instance.primary_node,
11994 "pnode_group_uuid": pnode.group,
11995 "pnode_group_name": group2name_fn(pnode.group),
11996 "snodes": instance.secondary_nodes,
11997 "snodes_group_uuids": snodes_group_uuids,
11998 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12000 # this happens to be the same format used for hooks
12001 "nics": _NICListToTuple(self, instance.nics),
12002 "disk_template": instance.disk_template,
12004 "hypervisor": instance.hypervisor,
12005 "network_port": instance.network_port,
12006 "hv_instance": instance.hvparams,
12007 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12008 "be_instance": instance.beparams,
12009 "be_actual": cluster.FillBE(instance),
12010 "os_instance": instance.osparams,
12011 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12012 "serial_no": instance.serial_no,
12013 "mtime": instance.mtime,
12014 "ctime": instance.ctime,
12015 "uuid": instance.uuid,
12021 def PrepareContainerMods(mods, private_fn):
12022 """Prepares a list of container modifications by adding a private data field.
12024 @type mods: list of tuples; (operation, index, parameters)
12025 @param mods: List of modifications
12026 @type private_fn: callable or None
12027 @param private_fn: Callable for constructing a private data field for a
12032 if private_fn is None:
12037 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12040 #: Type description for changes as returned by L{ApplyContainerMods}'s
12042 _TApplyContModsCbChanges = \
12043 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12044 ht.TNonEmptyString,
12049 def ApplyContainerMods(kind, container, chgdesc, mods,
12050 create_fn, modify_fn, remove_fn):
12051 """Applies descriptions in C{mods} to C{container}.
12054 @param kind: One-word item description
12055 @type container: list
12056 @param container: Container to modify
12057 @type chgdesc: None or list
12058 @param chgdesc: List of applied changes
12060 @param mods: Modifications as returned by L{PrepareContainerMods}
12061 @type create_fn: callable
12062 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12063 receives absolute item index, parameters and private data object as added
12064 by L{PrepareContainerMods}, returns tuple containing new item and changes
12066 @type modify_fn: callable
12067 @param modify_fn: Callback for modifying an existing item
12068 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12069 and private data object as added by L{PrepareContainerMods}, returns
12071 @type remove_fn: callable
12072 @param remove_fn: Callback on removing item; receives absolute item index,
12073 item and private data object as added by L{PrepareContainerMods}
12076 for (op, idx, params, private) in mods:
12079 absidx = len(container) - 1
12081 raise IndexError("Not accepting negative indices other than -1")
12082 elif idx > len(container):
12083 raise IndexError("Got %s index %s, but there are only %s" %
12084 (kind, idx, len(container)))
12090 if op == constants.DDM_ADD:
12091 # Calculate where item will be added
12093 addidx = len(container)
12097 if create_fn is None:
12100 (item, changes) = create_fn(addidx, params, private)
12103 container.append(item)
12106 assert idx <= len(container)
12107 # list.insert does so before the specified index
12108 container.insert(idx, item)
12110 # Retrieve existing item
12112 item = container[absidx]
12114 raise IndexError("Invalid %s index %s" % (kind, idx))
12116 if op == constants.DDM_REMOVE:
12119 if remove_fn is not None:
12120 remove_fn(absidx, item, private)
12122 changes = [("%s/%s" % (kind, absidx), "remove")]
12124 assert container[absidx] == item
12125 del container[absidx]
12126 elif op == constants.DDM_MODIFY:
12127 if modify_fn is not None:
12128 changes = modify_fn(absidx, item, params, private)
12130 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12132 assert _TApplyContModsCbChanges(changes)
12134 if not (chgdesc is None or changes is None):
12135 chgdesc.extend(changes)
12138 def _UpdateIvNames(base_index, disks):
12139 """Updates the C{iv_name} attribute of disks.
12141 @type disks: list of L{objects.Disk}
12144 for (idx, disk) in enumerate(disks):
12145 disk.iv_name = "disk/%s" % (base_index + idx, )
12148 class _InstNicModPrivate:
12149 """Data structure for network interface modifications.
12151 Used by L{LUInstanceSetParams}.
12154 def __init__(self):
12159 class LUInstanceSetParams(LogicalUnit):
12160 """Modifies an instances's parameters.
12163 HPATH = "instance-modify"
12164 HTYPE = constants.HTYPE_INSTANCE
12168 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12169 assert ht.TList(mods)
12170 assert not mods or len(mods[0]) in (2, 3)
12172 if mods and len(mods[0]) == 2:
12176 for op, params in mods:
12177 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12178 result.append((op, -1, params))
12182 raise errors.OpPrereqError("Only one %s add or remove operation is"
12183 " supported at a time" % kind,
12184 errors.ECODE_INVAL)
12186 result.append((constants.DDM_MODIFY, op, params))
12188 assert verify_fn(result)
12195 def _CheckMods(kind, mods, key_types, item_fn):
12196 """Ensures requested disk/NIC modifications are valid.
12199 for (op, _, params) in mods:
12200 assert ht.TDict(params)
12202 utils.ForceDictType(params, key_types)
12204 if op == constants.DDM_REMOVE:
12206 raise errors.OpPrereqError("No settings should be passed when"
12207 " removing a %s" % kind,
12208 errors.ECODE_INVAL)
12209 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12210 item_fn(op, params)
12212 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12215 def _VerifyDiskModification(op, params):
12216 """Verifies a disk modification.
12219 if op == constants.DDM_ADD:
12220 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12221 if mode not in constants.DISK_ACCESS_SET:
12222 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12223 errors.ECODE_INVAL)
12225 size = params.get(constants.IDISK_SIZE, None)
12227 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12228 constants.IDISK_SIZE, errors.ECODE_INVAL)
12232 except (TypeError, ValueError), err:
12233 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12234 errors.ECODE_INVAL)
12236 params[constants.IDISK_SIZE] = size
12238 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12239 raise errors.OpPrereqError("Disk size change not possible, use"
12240 " grow-disk", errors.ECODE_INVAL)
12243 def _VerifyNicModification(op, params):
12244 """Verifies a network interface modification.
12247 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12248 ip = params.get(constants.INIC_IP, None)
12251 elif ip.lower() == constants.VALUE_NONE:
12252 params[constants.INIC_IP] = None
12253 elif not netutils.IPAddress.IsValid(ip):
12254 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12255 errors.ECODE_INVAL)
12257 bridge = params.get("bridge", None)
12258 link = params.get(constants.INIC_LINK, None)
12259 if bridge and link:
12260 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12261 " at the same time", errors.ECODE_INVAL)
12262 elif bridge and bridge.lower() == constants.VALUE_NONE:
12263 params["bridge"] = None
12264 elif link and link.lower() == constants.VALUE_NONE:
12265 params[constants.INIC_LINK] = None
12267 if op == constants.DDM_ADD:
12268 macaddr = params.get(constants.INIC_MAC, None)
12269 if macaddr is None:
12270 params[constants.INIC_MAC] = constants.VALUE_AUTO
12272 if constants.INIC_MAC in params:
12273 macaddr = params[constants.INIC_MAC]
12274 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12275 macaddr = utils.NormalizeAndValidateMac(macaddr)
12277 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12278 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12279 " modifying an existing NIC",
12280 errors.ECODE_INVAL)
12282 def CheckArguments(self):
12283 if not (self.op.nics or self.op.disks or self.op.disk_template or
12284 self.op.hvparams or self.op.beparams or self.op.os_name or
12285 self.op.offline is not None or self.op.runtime_mem):
12286 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12288 if self.op.hvparams:
12289 _CheckGlobalHvParams(self.op.hvparams)
12291 self.op.disks = self._UpgradeDiskNicMods(
12292 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12293 self.op.nics = self._UpgradeDiskNicMods(
12294 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12296 # Check disk modifications
12297 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12298 self._VerifyDiskModification)
12300 if self.op.disks and self.op.disk_template is not None:
12301 raise errors.OpPrereqError("Disk template conversion and other disk"
12302 " changes not supported at the same time",
12303 errors.ECODE_INVAL)
12305 if (self.op.disk_template and
12306 self.op.disk_template in constants.DTS_INT_MIRROR and
12307 self.op.remote_node is None):
12308 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12309 " one requires specifying a secondary node",
12310 errors.ECODE_INVAL)
12312 # Check NIC modifications
12313 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12314 self._VerifyNicModification)
12316 def ExpandNames(self):
12317 self._ExpandAndLockInstance()
12318 # Can't even acquire node locks in shared mode as upcoming changes in
12319 # Ganeti 2.6 will start to modify the node object on disk conversion
12320 self.needed_locks[locking.LEVEL_NODE] = []
12321 self.needed_locks[locking.LEVEL_NODE_RES] = []
12322 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12324 def DeclareLocks(self, level):
12325 # TODO: Acquire group lock in shared mode (disk parameters)
12326 if level == locking.LEVEL_NODE:
12327 self._LockInstancesNodes()
12328 if self.op.disk_template and self.op.remote_node:
12329 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12330 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12331 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12333 self.needed_locks[locking.LEVEL_NODE_RES] = \
12334 self.needed_locks[locking.LEVEL_NODE][:]
12336 def BuildHooksEnv(self):
12337 """Build hooks env.
12339 This runs on the master, primary and secondaries.
12343 if constants.BE_MINMEM in self.be_new:
12344 args["minmem"] = self.be_new[constants.BE_MINMEM]
12345 if constants.BE_MAXMEM in self.be_new:
12346 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12347 if constants.BE_VCPUS in self.be_new:
12348 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12349 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12350 # information at all.
12352 if self._new_nics is not None:
12355 for nic in self._new_nics:
12356 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12357 mode = nicparams[constants.NIC_MODE]
12358 link = nicparams[constants.NIC_LINK]
12359 nics.append((nic.ip, nic.mac, mode, link))
12361 args["nics"] = nics
12363 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12364 if self.op.disk_template:
12365 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12366 if self.op.runtime_mem:
12367 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12371 def BuildHooksNodes(self):
12372 """Build hooks nodes.
12375 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12378 def _PrepareNicModification(self, params, private, old_ip, old_params,
12380 update_params_dict = dict([(key, params[key])
12381 for key in constants.NICS_PARAMETERS
12384 if "bridge" in params:
12385 update_params_dict[constants.NIC_LINK] = params["bridge"]
12387 new_params = _GetUpdatedParams(old_params, update_params_dict)
12388 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12390 new_filled_params = cluster.SimpleFillNIC(new_params)
12391 objects.NIC.CheckParameterSyntax(new_filled_params)
12393 new_mode = new_filled_params[constants.NIC_MODE]
12394 if new_mode == constants.NIC_MODE_BRIDGED:
12395 bridge = new_filled_params[constants.NIC_LINK]
12396 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12398 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12400 self.warn.append(msg)
12402 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12404 elif new_mode == constants.NIC_MODE_ROUTED:
12405 ip = params.get(constants.INIC_IP, old_ip)
12407 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12408 " on a routed NIC", errors.ECODE_INVAL)
12410 if constants.INIC_MAC in params:
12411 mac = params[constants.INIC_MAC]
12413 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12414 errors.ECODE_INVAL)
12415 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12416 # otherwise generate the MAC address
12417 params[constants.INIC_MAC] = \
12418 self.cfg.GenerateMAC(self.proc.GetECId())
12420 # or validate/reserve the current one
12422 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12423 except errors.ReservationError:
12424 raise errors.OpPrereqError("MAC address '%s' already in use"
12425 " in cluster" % mac,
12426 errors.ECODE_NOTUNIQUE)
12428 private.params = new_params
12429 private.filled = new_filled_params
12431 def CheckPrereq(self):
12432 """Check prerequisites.
12434 This only checks the instance list against the existing names.
12437 # checking the new params on the primary/secondary nodes
12439 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12440 cluster = self.cluster = self.cfg.GetClusterInfo()
12441 assert self.instance is not None, \
12442 "Cannot retrieve locked instance %s" % self.op.instance_name
12443 pnode = instance.primary_node
12444 nodelist = list(instance.all_nodes)
12445 pnode_info = self.cfg.GetNodeInfo(pnode)
12446 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12448 # Prepare disk/NIC modifications
12449 self.diskmod = PrepareContainerMods(self.op.disks, None)
12450 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12453 if self.op.os_name and not self.op.force:
12454 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12455 self.op.force_variant)
12456 instance_os = self.op.os_name
12458 instance_os = instance.os
12460 assert not (self.op.disk_template and self.op.disks), \
12461 "Can't modify disk template and apply disk changes at the same time"
12463 if self.op.disk_template:
12464 if instance.disk_template == self.op.disk_template:
12465 raise errors.OpPrereqError("Instance already has disk template %s" %
12466 instance.disk_template, errors.ECODE_INVAL)
12468 if (instance.disk_template,
12469 self.op.disk_template) not in self._DISK_CONVERSIONS:
12470 raise errors.OpPrereqError("Unsupported disk template conversion from"
12471 " %s to %s" % (instance.disk_template,
12472 self.op.disk_template),
12473 errors.ECODE_INVAL)
12474 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12475 msg="cannot change disk template")
12476 if self.op.disk_template in constants.DTS_INT_MIRROR:
12477 if self.op.remote_node == pnode:
12478 raise errors.OpPrereqError("Given new secondary node %s is the same"
12479 " as the primary node of the instance" %
12480 self.op.remote_node, errors.ECODE_STATE)
12481 _CheckNodeOnline(self, self.op.remote_node)
12482 _CheckNodeNotDrained(self, self.op.remote_node)
12483 # FIXME: here we assume that the old instance type is DT_PLAIN
12484 assert instance.disk_template == constants.DT_PLAIN
12485 disks = [{constants.IDISK_SIZE: d.size,
12486 constants.IDISK_VG: d.logical_id[0]}
12487 for d in instance.disks]
12488 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12489 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12491 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12492 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12493 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12495 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12496 ignore=self.op.ignore_ipolicy)
12497 if pnode_info.group != snode_info.group:
12498 self.LogWarning("The primary and secondary nodes are in two"
12499 " different node groups; the disk parameters"
12500 " from the first disk's node group will be"
12503 # hvparams processing
12504 if self.op.hvparams:
12505 hv_type = instance.hypervisor
12506 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12507 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12508 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12511 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12512 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12513 self.hv_proposed = self.hv_new = hv_new # the new actual values
12514 self.hv_inst = i_hvdict # the new dict (without defaults)
12516 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12518 self.hv_new = self.hv_inst = {}
12520 # beparams processing
12521 if self.op.beparams:
12522 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12524 objects.UpgradeBeParams(i_bedict)
12525 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12526 be_new = cluster.SimpleFillBE(i_bedict)
12527 self.be_proposed = self.be_new = be_new # the new actual values
12528 self.be_inst = i_bedict # the new dict (without defaults)
12530 self.be_new = self.be_inst = {}
12531 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12532 be_old = cluster.FillBE(instance)
12534 # CPU param validation -- checking every time a parameter is
12535 # changed to cover all cases where either CPU mask or vcpus have
12537 if (constants.BE_VCPUS in self.be_proposed and
12538 constants.HV_CPU_MASK in self.hv_proposed):
12540 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12541 # Verify mask is consistent with number of vCPUs. Can skip this
12542 # test if only 1 entry in the CPU mask, which means same mask
12543 # is applied to all vCPUs.
12544 if (len(cpu_list) > 1 and
12545 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12546 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12548 (self.be_proposed[constants.BE_VCPUS],
12549 self.hv_proposed[constants.HV_CPU_MASK]),
12550 errors.ECODE_INVAL)
12552 # Only perform this test if a new CPU mask is given
12553 if constants.HV_CPU_MASK in self.hv_new:
12554 # Calculate the largest CPU number requested
12555 max_requested_cpu = max(map(max, cpu_list))
12556 # Check that all of the instance's nodes have enough physical CPUs to
12557 # satisfy the requested CPU mask
12558 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12559 max_requested_cpu + 1, instance.hypervisor)
12561 # osparams processing
12562 if self.op.osparams:
12563 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12564 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12565 self.os_inst = i_osdict # the new dict (without defaults)
12571 #TODO(dynmem): do the appropriate check involving MINMEM
12572 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12573 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12574 mem_check_list = [pnode]
12575 if be_new[constants.BE_AUTO_BALANCE]:
12576 # either we changed auto_balance to yes or it was from before
12577 mem_check_list.extend(instance.secondary_nodes)
12578 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12579 instance.hypervisor)
12580 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12581 [instance.hypervisor])
12582 pninfo = nodeinfo[pnode]
12583 msg = pninfo.fail_msg
12585 # Assume the primary node is unreachable and go ahead
12586 self.warn.append("Can't get info from primary node %s: %s" %
12589 (_, _, (pnhvinfo, )) = pninfo.payload
12590 if not isinstance(pnhvinfo.get("memory_free", None), int):
12591 self.warn.append("Node data from primary node %s doesn't contain"
12592 " free memory information" % pnode)
12593 elif instance_info.fail_msg:
12594 self.warn.append("Can't get instance runtime information: %s" %
12595 instance_info.fail_msg)
12597 if instance_info.payload:
12598 current_mem = int(instance_info.payload["memory"])
12600 # Assume instance not running
12601 # (there is a slight race condition here, but it's not very
12602 # probable, and we have no other way to check)
12603 # TODO: Describe race condition
12605 #TODO(dynmem): do the appropriate check involving MINMEM
12606 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12607 pnhvinfo["memory_free"])
12609 raise errors.OpPrereqError("This change will prevent the instance"
12610 " from starting, due to %d MB of memory"
12611 " missing on its primary node" %
12612 miss_mem, errors.ECODE_NORES)
12614 if be_new[constants.BE_AUTO_BALANCE]:
12615 for node, nres in nodeinfo.items():
12616 if node not in instance.secondary_nodes:
12618 nres.Raise("Can't get info from secondary node %s" % node,
12619 prereq=True, ecode=errors.ECODE_STATE)
12620 (_, _, (nhvinfo, )) = nres.payload
12621 if not isinstance(nhvinfo.get("memory_free", None), int):
12622 raise errors.OpPrereqError("Secondary node %s didn't return free"
12623 " memory information" % node,
12624 errors.ECODE_STATE)
12625 #TODO(dynmem): do the appropriate check involving MINMEM
12626 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12627 raise errors.OpPrereqError("This change will prevent the instance"
12628 " from failover to its secondary node"
12629 " %s, due to not enough memory" % node,
12630 errors.ECODE_STATE)
12632 if self.op.runtime_mem:
12633 remote_info = self.rpc.call_instance_info(instance.primary_node,
12635 instance.hypervisor)
12636 remote_info.Raise("Error checking node %s" % instance.primary_node)
12637 if not remote_info.payload: # not running already
12638 raise errors.OpPrereqError("Instance %s is not running" %
12639 instance.name, errors.ECODE_STATE)
12641 current_memory = remote_info.payload["memory"]
12642 if (not self.op.force and
12643 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12644 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12645 raise errors.OpPrereqError("Instance %s must have memory between %d"
12646 " and %d MB of memory unless --force is"
12649 self.be_proposed[constants.BE_MINMEM],
12650 self.be_proposed[constants.BE_MAXMEM]),
12651 errors.ECODE_INVAL)
12653 if self.op.runtime_mem > current_memory:
12654 _CheckNodeFreeMemory(self, instance.primary_node,
12655 "ballooning memory for instance %s" %
12657 self.op.memory - current_memory,
12658 instance.hypervisor)
12660 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12661 raise errors.OpPrereqError("Disk operations not supported for"
12662 " diskless instances", errors.ECODE_INVAL)
12664 def _PrepareNicCreate(_, params, private):
12665 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12666 return (None, None)
12668 def _PrepareNicMod(_, nic, params, private):
12669 self._PrepareNicModification(params, private, nic.ip,
12670 nic.nicparams, cluster, pnode)
12673 # Verify NIC changes (operating on copy)
12674 nics = instance.nics[:]
12675 ApplyContainerMods("NIC", nics, None, self.nicmod,
12676 _PrepareNicCreate, _PrepareNicMod, None)
12677 if len(nics) > constants.MAX_NICS:
12678 raise errors.OpPrereqError("Instance has too many network interfaces"
12679 " (%d), cannot add more" % constants.MAX_NICS,
12680 errors.ECODE_STATE)
12682 # Verify disk changes (operating on a copy)
12683 disks = instance.disks[:]
12684 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12685 if len(disks) > constants.MAX_DISKS:
12686 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12687 " more" % constants.MAX_DISKS,
12688 errors.ECODE_STATE)
12690 if self.op.offline is not None:
12691 if self.op.offline:
12692 msg = "can't change to offline"
12694 msg = "can't change to online"
12695 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12697 # Pre-compute NIC changes (necessary to use result in hooks)
12698 self._nic_chgdesc = []
12700 # Operate on copies as this is still in prereq
12701 nics = [nic.Copy() for nic in instance.nics]
12702 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12703 self._CreateNewNic, self._ApplyNicMods, None)
12704 self._new_nics = nics
12706 self._new_nics = None
12708 def _ConvertPlainToDrbd(self, feedback_fn):
12709 """Converts an instance from plain to drbd.
12712 feedback_fn("Converting template to drbd")
12713 instance = self.instance
12714 pnode = instance.primary_node
12715 snode = self.op.remote_node
12717 assert instance.disk_template == constants.DT_PLAIN
12719 # create a fake disk info for _GenerateDiskTemplate
12720 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12721 constants.IDISK_VG: d.logical_id[0]}
12722 for d in instance.disks]
12723 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12724 instance.name, pnode, [snode],
12725 disk_info, None, None, 0, feedback_fn,
12727 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12729 info = _GetInstanceInfoText(instance)
12730 feedback_fn("Creating additional volumes...")
12731 # first, create the missing data and meta devices
12732 for disk in anno_disks:
12733 # unfortunately this is... not too nice
12734 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12736 for child in disk.children:
12737 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12738 # at this stage, all new LVs have been created, we can rename the
12740 feedback_fn("Renaming original volumes...")
12741 rename_list = [(o, n.children[0].logical_id)
12742 for (o, n) in zip(instance.disks, new_disks)]
12743 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12744 result.Raise("Failed to rename original LVs")
12746 feedback_fn("Initializing DRBD devices...")
12747 # all child devices are in place, we can now create the DRBD devices
12748 for disk in anno_disks:
12749 for node in [pnode, snode]:
12750 f_create = node == pnode
12751 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12753 # at this point, the instance has been modified
12754 instance.disk_template = constants.DT_DRBD8
12755 instance.disks = new_disks
12756 self.cfg.Update(instance, feedback_fn)
12758 # Release node locks while waiting for sync
12759 _ReleaseLocks(self, locking.LEVEL_NODE)
12761 # disks are created, waiting for sync
12762 disk_abort = not _WaitForSync(self, instance,
12763 oneshot=not self.op.wait_for_sync)
12765 raise errors.OpExecError("There are some degraded disks for"
12766 " this instance, please cleanup manually")
12768 # Node resource locks will be released by caller
12770 def _ConvertDrbdToPlain(self, feedback_fn):
12771 """Converts an instance from drbd to plain.
12774 instance = self.instance
12776 assert len(instance.secondary_nodes) == 1
12777 assert instance.disk_template == constants.DT_DRBD8
12779 pnode = instance.primary_node
12780 snode = instance.secondary_nodes[0]
12781 feedback_fn("Converting template to plain")
12783 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12784 new_disks = [d.children[0] for d in instance.disks]
12786 # copy over size and mode
12787 for parent, child in zip(old_disks, new_disks):
12788 child.size = parent.size
12789 child.mode = parent.mode
12791 # this is a DRBD disk, return its port to the pool
12792 # NOTE: this must be done right before the call to cfg.Update!
12793 for disk in old_disks:
12794 tcp_port = disk.logical_id[2]
12795 self.cfg.AddTcpUdpPort(tcp_port)
12797 # update instance structure
12798 instance.disks = new_disks
12799 instance.disk_template = constants.DT_PLAIN
12800 self.cfg.Update(instance, feedback_fn)
12802 # Release locks in case removing disks takes a while
12803 _ReleaseLocks(self, locking.LEVEL_NODE)
12805 feedback_fn("Removing volumes on the secondary node...")
12806 for disk in old_disks:
12807 self.cfg.SetDiskID(disk, snode)
12808 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12810 self.LogWarning("Could not remove block device %s on node %s,"
12811 " continuing anyway: %s", disk.iv_name, snode, msg)
12813 feedback_fn("Removing unneeded volumes on the primary node...")
12814 for idx, disk in enumerate(old_disks):
12815 meta = disk.children[1]
12816 self.cfg.SetDiskID(meta, pnode)
12817 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12819 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12820 " continuing anyway: %s", idx, pnode, msg)
12822 def _CreateNewDisk(self, idx, params, _):
12823 """Creates a new disk.
12826 instance = self.instance
12829 if instance.disk_template in constants.DTS_FILEBASED:
12830 (file_driver, file_path) = instance.disks[0].logical_id
12831 file_path = os.path.dirname(file_path)
12833 file_driver = file_path = None
12836 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12837 instance.primary_node, instance.secondary_nodes,
12838 [params], file_path, file_driver, idx,
12839 self.Log, self.diskparams)[0]
12841 info = _GetInstanceInfoText(instance)
12843 logging.info("Creating volume %s for instance %s",
12844 disk.iv_name, instance.name)
12845 # Note: this needs to be kept in sync with _CreateDisks
12847 for node in instance.all_nodes:
12848 f_create = (node == instance.primary_node)
12850 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12851 except errors.OpExecError, err:
12852 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12853 disk.iv_name, disk, node, err)
12856 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12860 def _ModifyDisk(idx, disk, params, _):
12861 """Modifies a disk.
12864 disk.mode = params[constants.IDISK_MODE]
12867 ("disk.mode/%d" % idx, disk.mode),
12870 def _RemoveDisk(self, idx, root, _):
12874 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12875 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12876 self.cfg.SetDiskID(disk, node)
12877 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12879 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12880 " continuing anyway", idx, node, msg)
12882 # if this is a DRBD disk, return its port to the pool
12883 if root.dev_type in constants.LDS_DRBD:
12884 self.cfg.AddTcpUdpPort(root.logical_id[2])
12887 def _CreateNewNic(idx, params, private):
12888 """Creates data structure for a new network interface.
12891 mac = params[constants.INIC_MAC]
12892 ip = params.get(constants.INIC_IP, None)
12893 nicparams = private.params
12895 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12897 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12898 (mac, ip, private.filled[constants.NIC_MODE],
12899 private.filled[constants.NIC_LINK])),
12903 def _ApplyNicMods(idx, nic, params, private):
12904 """Modifies a network interface.
12909 for key in [constants.INIC_MAC, constants.INIC_IP]:
12911 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12912 setattr(nic, key, params[key])
12915 nic.nicparams = private.params
12917 for (key, val) in params.items():
12918 changes.append(("nic.%s/%d" % (key, idx), val))
12922 def Exec(self, feedback_fn):
12923 """Modifies an instance.
12925 All parameters take effect only at the next restart of the instance.
12928 # Process here the warnings from CheckPrereq, as we don't have a
12929 # feedback_fn there.
12930 # TODO: Replace with self.LogWarning
12931 for warn in self.warn:
12932 feedback_fn("WARNING: %s" % warn)
12934 assert ((self.op.disk_template is None) ^
12935 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12936 "Not owning any node resource locks"
12939 instance = self.instance
12942 if self.op.runtime_mem:
12943 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12945 self.op.runtime_mem)
12946 rpcres.Raise("Cannot modify instance runtime memory")
12947 result.append(("runtime_memory", self.op.runtime_mem))
12949 # Apply disk changes
12950 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12951 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12952 _UpdateIvNames(0, instance.disks)
12954 if self.op.disk_template:
12956 check_nodes = set(instance.all_nodes)
12957 if self.op.remote_node:
12958 check_nodes.add(self.op.remote_node)
12959 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12960 owned = self.owned_locks(level)
12961 assert not (check_nodes - owned), \
12962 ("Not owning the correct locks, owning %r, expected at least %r" %
12963 (owned, check_nodes))
12965 r_shut = _ShutdownInstanceDisks(self, instance)
12967 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12968 " proceed with disk template conversion")
12969 mode = (instance.disk_template, self.op.disk_template)
12971 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12973 self.cfg.ReleaseDRBDMinors(instance.name)
12975 result.append(("disk_template", self.op.disk_template))
12977 assert instance.disk_template == self.op.disk_template, \
12978 ("Expected disk template '%s', found '%s'" %
12979 (self.op.disk_template, instance.disk_template))
12981 # Release node and resource locks if there are any (they might already have
12982 # been released during disk conversion)
12983 _ReleaseLocks(self, locking.LEVEL_NODE)
12984 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12986 # Apply NIC changes
12987 if self._new_nics is not None:
12988 instance.nics = self._new_nics
12989 result.extend(self._nic_chgdesc)
12992 if self.op.hvparams:
12993 instance.hvparams = self.hv_inst
12994 for key, val in self.op.hvparams.iteritems():
12995 result.append(("hv/%s" % key, val))
12998 if self.op.beparams:
12999 instance.beparams = self.be_inst
13000 for key, val in self.op.beparams.iteritems():
13001 result.append(("be/%s" % key, val))
13004 if self.op.os_name:
13005 instance.os = self.op.os_name
13008 if self.op.osparams:
13009 instance.osparams = self.os_inst
13010 for key, val in self.op.osparams.iteritems():
13011 result.append(("os/%s" % key, val))
13013 if self.op.offline is None:
13016 elif self.op.offline:
13017 # Mark instance as offline
13018 self.cfg.MarkInstanceOffline(instance.name)
13019 result.append(("admin_state", constants.ADMINST_OFFLINE))
13021 # Mark instance as online, but stopped
13022 self.cfg.MarkInstanceDown(instance.name)
13023 result.append(("admin_state", constants.ADMINST_DOWN))
13025 self.cfg.Update(instance, feedback_fn)
13027 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13028 self.owned_locks(locking.LEVEL_NODE)), \
13029 "All node locks should have been released by now"
13033 _DISK_CONVERSIONS = {
13034 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13035 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13039 class LUInstanceChangeGroup(LogicalUnit):
13040 HPATH = "instance-change-group"
13041 HTYPE = constants.HTYPE_INSTANCE
13044 def ExpandNames(self):
13045 self.share_locks = _ShareAll()
13046 self.needed_locks = {
13047 locking.LEVEL_NODEGROUP: [],
13048 locking.LEVEL_NODE: [],
13051 self._ExpandAndLockInstance()
13053 if self.op.target_groups:
13054 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13055 self.op.target_groups)
13057 self.req_target_uuids = None
13059 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13061 def DeclareLocks(self, level):
13062 if level == locking.LEVEL_NODEGROUP:
13063 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13065 if self.req_target_uuids:
13066 lock_groups = set(self.req_target_uuids)
13068 # Lock all groups used by instance optimistically; this requires going
13069 # via the node before it's locked, requiring verification later on
13070 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13071 lock_groups.update(instance_groups)
13073 # No target groups, need to lock all of them
13074 lock_groups = locking.ALL_SET
13076 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13078 elif level == locking.LEVEL_NODE:
13079 if self.req_target_uuids:
13080 # Lock all nodes used by instances
13081 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13082 self._LockInstancesNodes()
13084 # Lock all nodes in all potential target groups
13085 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13086 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13087 member_nodes = [node_name
13088 for group in lock_groups
13089 for node_name in self.cfg.GetNodeGroup(group).members]
13090 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13092 # Lock all nodes as all groups are potential targets
13093 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13095 def CheckPrereq(self):
13096 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13097 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13098 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13100 assert (self.req_target_uuids is None or
13101 owned_groups.issuperset(self.req_target_uuids))
13102 assert owned_instances == set([self.op.instance_name])
13104 # Get instance information
13105 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13107 # Check if node groups for locked instance are still correct
13108 assert owned_nodes.issuperset(self.instance.all_nodes), \
13109 ("Instance %s's nodes changed while we kept the lock" %
13110 self.op.instance_name)
13112 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13115 if self.req_target_uuids:
13116 # User requested specific target groups
13117 self.target_uuids = frozenset(self.req_target_uuids)
13119 # All groups except those used by the instance are potential targets
13120 self.target_uuids = owned_groups - inst_groups
13122 conflicting_groups = self.target_uuids & inst_groups
13123 if conflicting_groups:
13124 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13125 " used by the instance '%s'" %
13126 (utils.CommaJoin(conflicting_groups),
13127 self.op.instance_name),
13128 errors.ECODE_INVAL)
13130 if not self.target_uuids:
13131 raise errors.OpPrereqError("There are no possible target groups",
13132 errors.ECODE_INVAL)
13134 def BuildHooksEnv(self):
13135 """Build hooks env.
13138 assert self.target_uuids
13141 "TARGET_GROUPS": " ".join(self.target_uuids),
13144 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13148 def BuildHooksNodes(self):
13149 """Build hooks nodes.
13152 mn = self.cfg.GetMasterNode()
13153 return ([mn], [mn])
13155 def Exec(self, feedback_fn):
13156 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13158 assert instances == [self.op.instance_name], "Instance not locked"
13160 req = iallocator.IAReqGroupChange(instances=instances,
13161 target_groups=list(self.target_uuids))
13162 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13164 ial.Run(self.op.iallocator)
13166 if not ial.success:
13167 raise errors.OpPrereqError("Can't compute solution for changing group of"
13168 " instance '%s' using iallocator '%s': %s" %
13169 (self.op.instance_name, self.op.iallocator,
13170 ial.info), errors.ECODE_NORES)
13172 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13174 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13175 " instance '%s'", len(jobs), self.op.instance_name)
13177 return ResultWithJobs(jobs)
13180 class LUBackupQuery(NoHooksLU):
13181 """Query the exports list
13186 def CheckArguments(self):
13187 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13188 ["node", "export"], self.op.use_locking)
13190 def ExpandNames(self):
13191 self.expq.ExpandNames(self)
13193 def DeclareLocks(self, level):
13194 self.expq.DeclareLocks(self, level)
13196 def Exec(self, feedback_fn):
13199 for (node, expname) in self.expq.OldStyleQuery(self):
13200 if expname is None:
13201 result[node] = False
13203 result.setdefault(node, []).append(expname)
13208 class _ExportQuery(_QueryBase):
13209 FIELDS = query.EXPORT_FIELDS
13211 #: The node name is not a unique key for this query
13212 SORT_FIELD = "node"
13214 def ExpandNames(self, lu):
13215 lu.needed_locks = {}
13217 # The following variables interact with _QueryBase._GetNames
13219 self.wanted = _GetWantedNodes(lu, self.names)
13221 self.wanted = locking.ALL_SET
13223 self.do_locking = self.use_locking
13225 if self.do_locking:
13226 lu.share_locks = _ShareAll()
13227 lu.needed_locks = {
13228 locking.LEVEL_NODE: self.wanted,
13231 def DeclareLocks(self, lu, level):
13234 def _GetQueryData(self, lu):
13235 """Computes the list of nodes and their attributes.
13238 # Locking is not used
13240 assert not (compat.any(lu.glm.is_owned(level)
13241 for level in locking.LEVELS
13242 if level != locking.LEVEL_CLUSTER) or
13243 self.do_locking or self.use_locking)
13245 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13249 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13251 result.append((node, None))
13253 result.extend((node, expname) for expname in nres.payload)
13258 class LUBackupPrepare(NoHooksLU):
13259 """Prepares an instance for an export and returns useful information.
13264 def ExpandNames(self):
13265 self._ExpandAndLockInstance()
13267 def CheckPrereq(self):
13268 """Check prerequisites.
13271 instance_name = self.op.instance_name
13273 self.instance = self.cfg.GetInstanceInfo(instance_name)
13274 assert self.instance is not None, \
13275 "Cannot retrieve locked instance %s" % self.op.instance_name
13276 _CheckNodeOnline(self, self.instance.primary_node)
13278 self._cds = _GetClusterDomainSecret()
13280 def Exec(self, feedback_fn):
13281 """Prepares an instance for an export.
13284 instance = self.instance
13286 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13287 salt = utils.GenerateSecret(8)
13289 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13290 result = self.rpc.call_x509_cert_create(instance.primary_node,
13291 constants.RIE_CERT_VALIDITY)
13292 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13294 (name, cert_pem) = result.payload
13296 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13300 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13301 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13303 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13309 class LUBackupExport(LogicalUnit):
13310 """Export an instance to an image in the cluster.
13313 HPATH = "instance-export"
13314 HTYPE = constants.HTYPE_INSTANCE
13317 def CheckArguments(self):
13318 """Check the arguments.
13321 self.x509_key_name = self.op.x509_key_name
13322 self.dest_x509_ca_pem = self.op.destination_x509_ca
13324 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13325 if not self.x509_key_name:
13326 raise errors.OpPrereqError("Missing X509 key name for encryption",
13327 errors.ECODE_INVAL)
13329 if not self.dest_x509_ca_pem:
13330 raise errors.OpPrereqError("Missing destination X509 CA",
13331 errors.ECODE_INVAL)
13333 def ExpandNames(self):
13334 self._ExpandAndLockInstance()
13336 # Lock all nodes for local exports
13337 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13338 # FIXME: lock only instance primary and destination node
13340 # Sad but true, for now we have do lock all nodes, as we don't know where
13341 # the previous export might be, and in this LU we search for it and
13342 # remove it from its current node. In the future we could fix this by:
13343 # - making a tasklet to search (share-lock all), then create the
13344 # new one, then one to remove, after
13345 # - removing the removal operation altogether
13346 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13348 def DeclareLocks(self, level):
13349 """Last minute lock declaration."""
13350 # All nodes are locked anyway, so nothing to do here.
13352 def BuildHooksEnv(self):
13353 """Build hooks env.
13355 This will run on the master, primary node and target node.
13359 "EXPORT_MODE": self.op.mode,
13360 "EXPORT_NODE": self.op.target_node,
13361 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13362 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13363 # TODO: Generic function for boolean env variables
13364 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13367 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13371 def BuildHooksNodes(self):
13372 """Build hooks nodes.
13375 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13377 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13378 nl.append(self.op.target_node)
13382 def CheckPrereq(self):
13383 """Check prerequisites.
13385 This checks that the instance and node names are valid.
13388 instance_name = self.op.instance_name
13390 self.instance = self.cfg.GetInstanceInfo(instance_name)
13391 assert self.instance is not None, \
13392 "Cannot retrieve locked instance %s" % self.op.instance_name
13393 _CheckNodeOnline(self, self.instance.primary_node)
13395 if (self.op.remove_instance and
13396 self.instance.admin_state == constants.ADMINST_UP and
13397 not self.op.shutdown):
13398 raise errors.OpPrereqError("Can not remove instance without shutting it"
13399 " down before", errors.ECODE_STATE)
13401 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13402 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13403 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13404 assert self.dst_node is not None
13406 _CheckNodeOnline(self, self.dst_node.name)
13407 _CheckNodeNotDrained(self, self.dst_node.name)
13410 self.dest_disk_info = None
13411 self.dest_x509_ca = None
13413 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13414 self.dst_node = None
13416 if len(self.op.target_node) != len(self.instance.disks):
13417 raise errors.OpPrereqError(("Received destination information for %s"
13418 " disks, but instance %s has %s disks") %
13419 (len(self.op.target_node), instance_name,
13420 len(self.instance.disks)),
13421 errors.ECODE_INVAL)
13423 cds = _GetClusterDomainSecret()
13425 # Check X509 key name
13427 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13428 except (TypeError, ValueError), err:
13429 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13430 errors.ECODE_INVAL)
13432 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13433 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13434 errors.ECODE_INVAL)
13436 # Load and verify CA
13438 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13439 except OpenSSL.crypto.Error, err:
13440 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13441 (err, ), errors.ECODE_INVAL)
13443 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13444 if errcode is not None:
13445 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13446 (msg, ), errors.ECODE_INVAL)
13448 self.dest_x509_ca = cert
13450 # Verify target information
13452 for idx, disk_data in enumerate(self.op.target_node):
13454 (host, port, magic) = \
13455 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13456 except errors.GenericError, err:
13457 raise errors.OpPrereqError("Target info for disk %s: %s" %
13458 (idx, err), errors.ECODE_INVAL)
13460 disk_info.append((host, port, magic))
13462 assert len(disk_info) == len(self.op.target_node)
13463 self.dest_disk_info = disk_info
13466 raise errors.ProgrammerError("Unhandled export mode %r" %
13469 # instance disk type verification
13470 # TODO: Implement export support for file-based disks
13471 for disk in self.instance.disks:
13472 if disk.dev_type == constants.LD_FILE:
13473 raise errors.OpPrereqError("Export not supported for instances with"
13474 " file-based disks", errors.ECODE_INVAL)
13476 def _CleanupExports(self, feedback_fn):
13477 """Removes exports of current instance from all other nodes.
13479 If an instance in a cluster with nodes A..D was exported to node C, its
13480 exports will be removed from the nodes A, B and D.
13483 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13485 nodelist = self.cfg.GetNodeList()
13486 nodelist.remove(self.dst_node.name)
13488 # on one-node clusters nodelist will be empty after the removal
13489 # if we proceed the backup would be removed because OpBackupQuery
13490 # substitutes an empty list with the full cluster node list.
13491 iname = self.instance.name
13493 feedback_fn("Removing old exports for instance %s" % iname)
13494 exportlist = self.rpc.call_export_list(nodelist)
13495 for node in exportlist:
13496 if exportlist[node].fail_msg:
13498 if iname in exportlist[node].payload:
13499 msg = self.rpc.call_export_remove(node, iname).fail_msg
13501 self.LogWarning("Could not remove older export for instance %s"
13502 " on node %s: %s", iname, node, msg)
13504 def Exec(self, feedback_fn):
13505 """Export an instance to an image in the cluster.
13508 assert self.op.mode in constants.EXPORT_MODES
13510 instance = self.instance
13511 src_node = instance.primary_node
13513 if self.op.shutdown:
13514 # shutdown the instance, but not the disks
13515 feedback_fn("Shutting down instance %s" % instance.name)
13516 result = self.rpc.call_instance_shutdown(src_node, instance,
13517 self.op.shutdown_timeout)
13518 # TODO: Maybe ignore failures if ignore_remove_failures is set
13519 result.Raise("Could not shutdown instance %s on"
13520 " node %s" % (instance.name, src_node))
13522 # set the disks ID correctly since call_instance_start needs the
13523 # correct drbd minor to create the symlinks
13524 for disk in instance.disks:
13525 self.cfg.SetDiskID(disk, src_node)
13527 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13530 # Activate the instance disks if we'exporting a stopped instance
13531 feedback_fn("Activating disks for %s" % instance.name)
13532 _StartInstanceDisks(self, instance, None)
13535 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13538 helper.CreateSnapshots()
13540 if (self.op.shutdown and
13541 instance.admin_state == constants.ADMINST_UP and
13542 not self.op.remove_instance):
13543 assert not activate_disks
13544 feedback_fn("Starting instance %s" % instance.name)
13545 result = self.rpc.call_instance_start(src_node,
13546 (instance, None, None), False)
13547 msg = result.fail_msg
13549 feedback_fn("Failed to start instance: %s" % msg)
13550 _ShutdownInstanceDisks(self, instance)
13551 raise errors.OpExecError("Could not start instance: %s" % msg)
13553 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13554 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13555 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13556 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13557 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13559 (key_name, _, _) = self.x509_key_name
13562 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13565 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13566 key_name, dest_ca_pem,
13571 # Check for backwards compatibility
13572 assert len(dresults) == len(instance.disks)
13573 assert compat.all(isinstance(i, bool) for i in dresults), \
13574 "Not all results are boolean: %r" % dresults
13578 feedback_fn("Deactivating disks for %s" % instance.name)
13579 _ShutdownInstanceDisks(self, instance)
13581 if not (compat.all(dresults) and fin_resu):
13584 failures.append("export finalization")
13585 if not compat.all(dresults):
13586 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13588 failures.append("disk export: disk(s) %s" % fdsk)
13590 raise errors.OpExecError("Export failed, errors in %s" %
13591 utils.CommaJoin(failures))
13593 # At this point, the export was successful, we can cleanup/finish
13595 # Remove instance if requested
13596 if self.op.remove_instance:
13597 feedback_fn("Removing instance %s" % instance.name)
13598 _RemoveInstance(self, feedback_fn, instance,
13599 self.op.ignore_remove_failures)
13601 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13602 self._CleanupExports(feedback_fn)
13604 return fin_resu, dresults
13607 class LUBackupRemove(NoHooksLU):
13608 """Remove exports related to the named instance.
13613 def ExpandNames(self):
13614 self.needed_locks = {}
13615 # We need all nodes to be locked in order for RemoveExport to work, but we
13616 # don't need to lock the instance itself, as nothing will happen to it (and
13617 # we can remove exports also for a removed instance)
13618 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13620 def Exec(self, feedback_fn):
13621 """Remove any export.
13624 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13625 # If the instance was not found we'll try with the name that was passed in.
13626 # This will only work if it was an FQDN, though.
13628 if not instance_name:
13630 instance_name = self.op.instance_name
13632 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13633 exportlist = self.rpc.call_export_list(locked_nodes)
13635 for node in exportlist:
13636 msg = exportlist[node].fail_msg
13638 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13640 if instance_name in exportlist[node].payload:
13642 result = self.rpc.call_export_remove(node, instance_name)
13643 msg = result.fail_msg
13645 logging.error("Could not remove export for instance %s"
13646 " on node %s: %s", instance_name, node, msg)
13648 if fqdn_warn and not found:
13649 feedback_fn("Export not found. If trying to remove an export belonging"
13650 " to a deleted instance please use its Fully Qualified"
13654 class LUGroupAdd(LogicalUnit):
13655 """Logical unit for creating node groups.
13658 HPATH = "group-add"
13659 HTYPE = constants.HTYPE_GROUP
13662 def ExpandNames(self):
13663 # We need the new group's UUID here so that we can create and acquire the
13664 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13665 # that it should not check whether the UUID exists in the configuration.
13666 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13667 self.needed_locks = {}
13668 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13670 def CheckPrereq(self):
13671 """Check prerequisites.
13673 This checks that the given group name is not an existing node group
13678 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13679 except errors.OpPrereqError:
13682 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13683 " node group (UUID: %s)" %
13684 (self.op.group_name, existing_uuid),
13685 errors.ECODE_EXISTS)
13687 if self.op.ndparams:
13688 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13690 if self.op.hv_state:
13691 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13693 self.new_hv_state = None
13695 if self.op.disk_state:
13696 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13698 self.new_disk_state = None
13700 if self.op.diskparams:
13701 for templ in constants.DISK_TEMPLATES:
13702 if templ in self.op.diskparams:
13703 utils.ForceDictType(self.op.diskparams[templ],
13704 constants.DISK_DT_TYPES)
13705 self.new_diskparams = self.op.diskparams
13707 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13708 except errors.OpPrereqError, err:
13709 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13710 errors.ECODE_INVAL)
13712 self.new_diskparams = {}
13714 if self.op.ipolicy:
13715 cluster = self.cfg.GetClusterInfo()
13716 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13718 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13719 except errors.ConfigurationError, err:
13720 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13721 errors.ECODE_INVAL)
13723 def BuildHooksEnv(self):
13724 """Build hooks env.
13728 "GROUP_NAME": self.op.group_name,
13731 def BuildHooksNodes(self):
13732 """Build hooks nodes.
13735 mn = self.cfg.GetMasterNode()
13736 return ([mn], [mn])
13738 def Exec(self, feedback_fn):
13739 """Add the node group to the cluster.
13742 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13743 uuid=self.group_uuid,
13744 alloc_policy=self.op.alloc_policy,
13745 ndparams=self.op.ndparams,
13746 diskparams=self.new_diskparams,
13747 ipolicy=self.op.ipolicy,
13748 hv_state_static=self.new_hv_state,
13749 disk_state_static=self.new_disk_state)
13751 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13752 del self.remove_locks[locking.LEVEL_NODEGROUP]
13755 class LUGroupAssignNodes(NoHooksLU):
13756 """Logical unit for assigning nodes to groups.
13761 def ExpandNames(self):
13762 # These raise errors.OpPrereqError on their own:
13763 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13764 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13766 # We want to lock all the affected nodes and groups. We have readily
13767 # available the list of nodes, and the *destination* group. To gather the
13768 # list of "source" groups, we need to fetch node information later on.
13769 self.needed_locks = {
13770 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13771 locking.LEVEL_NODE: self.op.nodes,
13774 def DeclareLocks(self, level):
13775 if level == locking.LEVEL_NODEGROUP:
13776 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13778 # Try to get all affected nodes' groups without having the group or node
13779 # lock yet. Needs verification later in the code flow.
13780 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13782 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13784 def CheckPrereq(self):
13785 """Check prerequisites.
13788 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13789 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13790 frozenset(self.op.nodes))
13792 expected_locks = (set([self.group_uuid]) |
13793 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13794 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13795 if actual_locks != expected_locks:
13796 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13797 " current groups are '%s', used to be '%s'" %
13798 (utils.CommaJoin(expected_locks),
13799 utils.CommaJoin(actual_locks)))
13801 self.node_data = self.cfg.GetAllNodesInfo()
13802 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13803 instance_data = self.cfg.GetAllInstancesInfo()
13805 if self.group is None:
13806 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13807 (self.op.group_name, self.group_uuid))
13809 (new_splits, previous_splits) = \
13810 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13811 for node in self.op.nodes],
13812 self.node_data, instance_data)
13815 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13817 if not self.op.force:
13818 raise errors.OpExecError("The following instances get split by this"
13819 " change and --force was not given: %s" %
13822 self.LogWarning("This operation will split the following instances: %s",
13825 if previous_splits:
13826 self.LogWarning("In addition, these already-split instances continue"
13827 " to be split across groups: %s",
13828 utils.CommaJoin(utils.NiceSort(previous_splits)))
13830 def Exec(self, feedback_fn):
13831 """Assign nodes to a new group.
13834 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13836 self.cfg.AssignGroupNodes(mods)
13839 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13840 """Check for split instances after a node assignment.
13842 This method considers a series of node assignments as an atomic operation,
13843 and returns information about split instances after applying the set of
13846 In particular, it returns information about newly split instances, and
13847 instances that were already split, and remain so after the change.
13849 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13852 @type changes: list of (node_name, new_group_uuid) pairs.
13853 @param changes: list of node assignments to consider.
13854 @param node_data: a dict with data for all nodes
13855 @param instance_data: a dict with all instances to consider
13856 @rtype: a two-tuple
13857 @return: a list of instances that were previously okay and result split as a
13858 consequence of this change, and a list of instances that were previously
13859 split and this change does not fix.
13862 changed_nodes = dict((node, group) for node, group in changes
13863 if node_data[node].group != group)
13865 all_split_instances = set()
13866 previously_split_instances = set()
13868 def InstanceNodes(instance):
13869 return [instance.primary_node] + list(instance.secondary_nodes)
13871 for inst in instance_data.values():
13872 if inst.disk_template not in constants.DTS_INT_MIRROR:
13875 instance_nodes = InstanceNodes(inst)
13877 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13878 previously_split_instances.add(inst.name)
13880 if len(set(changed_nodes.get(node, node_data[node].group)
13881 for node in instance_nodes)) > 1:
13882 all_split_instances.add(inst.name)
13884 return (list(all_split_instances - previously_split_instances),
13885 list(previously_split_instances & all_split_instances))
13888 class _GroupQuery(_QueryBase):
13889 FIELDS = query.GROUP_FIELDS
13891 def ExpandNames(self, lu):
13892 lu.needed_locks = {}
13894 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13895 self._cluster = lu.cfg.GetClusterInfo()
13896 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13899 self.wanted = [name_to_uuid[name]
13900 for name in utils.NiceSort(name_to_uuid.keys())]
13902 # Accept names to be either names or UUIDs.
13905 all_uuid = frozenset(self._all_groups.keys())
13907 for name in self.names:
13908 if name in all_uuid:
13909 self.wanted.append(name)
13910 elif name in name_to_uuid:
13911 self.wanted.append(name_to_uuid[name])
13913 missing.append(name)
13916 raise errors.OpPrereqError("Some groups do not exist: %s" %
13917 utils.CommaJoin(missing),
13918 errors.ECODE_NOENT)
13920 def DeclareLocks(self, lu, level):
13923 def _GetQueryData(self, lu):
13924 """Computes the list of node groups and their attributes.
13927 do_nodes = query.GQ_NODE in self.requested_data
13928 do_instances = query.GQ_INST in self.requested_data
13930 group_to_nodes = None
13931 group_to_instances = None
13933 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13934 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13935 # latter GetAllInstancesInfo() is not enough, for we have to go through
13936 # instance->node. Hence, we will need to process nodes even if we only need
13937 # instance information.
13938 if do_nodes or do_instances:
13939 all_nodes = lu.cfg.GetAllNodesInfo()
13940 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13943 for node in all_nodes.values():
13944 if node.group in group_to_nodes:
13945 group_to_nodes[node.group].append(node.name)
13946 node_to_group[node.name] = node.group
13949 all_instances = lu.cfg.GetAllInstancesInfo()
13950 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13952 for instance in all_instances.values():
13953 node = instance.primary_node
13954 if node in node_to_group:
13955 group_to_instances[node_to_group[node]].append(instance.name)
13958 # Do not pass on node information if it was not requested.
13959 group_to_nodes = None
13961 return query.GroupQueryData(self._cluster,
13962 [self._all_groups[uuid]
13963 for uuid in self.wanted],
13964 group_to_nodes, group_to_instances,
13965 query.GQ_DISKPARAMS in self.requested_data)
13968 class LUGroupQuery(NoHooksLU):
13969 """Logical unit for querying node groups.
13974 def CheckArguments(self):
13975 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13976 self.op.output_fields, False)
13978 def ExpandNames(self):
13979 self.gq.ExpandNames(self)
13981 def DeclareLocks(self, level):
13982 self.gq.DeclareLocks(self, level)
13984 def Exec(self, feedback_fn):
13985 return self.gq.OldStyleQuery(self)
13988 class LUGroupSetParams(LogicalUnit):
13989 """Modifies the parameters of a node group.
13992 HPATH = "group-modify"
13993 HTYPE = constants.HTYPE_GROUP
13996 def CheckArguments(self):
13999 self.op.diskparams,
14000 self.op.alloc_policy,
14002 self.op.disk_state,
14006 if all_changes.count(None) == len(all_changes):
14007 raise errors.OpPrereqError("Please pass at least one modification",
14008 errors.ECODE_INVAL)
14010 def ExpandNames(self):
14011 # This raises errors.OpPrereqError on its own:
14012 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14014 self.needed_locks = {
14015 locking.LEVEL_INSTANCE: [],
14016 locking.LEVEL_NODEGROUP: [self.group_uuid],
14019 self.share_locks[locking.LEVEL_INSTANCE] = 1
14021 def DeclareLocks(self, level):
14022 if level == locking.LEVEL_INSTANCE:
14023 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14025 # Lock instances optimistically, needs verification once group lock has
14027 self.needed_locks[locking.LEVEL_INSTANCE] = \
14028 self.cfg.GetNodeGroupInstances(self.group_uuid)
14031 def _UpdateAndVerifyDiskParams(old, new):
14032 """Updates and verifies disk parameters.
14035 new_params = _GetUpdatedParams(old, new)
14036 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14039 def CheckPrereq(self):
14040 """Check prerequisites.
14043 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14045 # Check if locked instances are still correct
14046 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14048 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14049 cluster = self.cfg.GetClusterInfo()
14051 if self.group is None:
14052 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14053 (self.op.group_name, self.group_uuid))
14055 if self.op.ndparams:
14056 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14057 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14058 self.new_ndparams = new_ndparams
14060 if self.op.diskparams:
14061 diskparams = self.group.diskparams
14062 uavdp = self._UpdateAndVerifyDiskParams
14063 # For each disktemplate subdict update and verify the values
14064 new_diskparams = dict((dt,
14065 uavdp(diskparams.get(dt, {}),
14066 self.op.diskparams[dt]))
14067 for dt in constants.DISK_TEMPLATES
14068 if dt in self.op.diskparams)
14069 # As we've all subdicts of diskparams ready, lets merge the actual
14070 # dict with all updated subdicts
14071 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14073 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14074 except errors.OpPrereqError, err:
14075 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14076 errors.ECODE_INVAL)
14078 if self.op.hv_state:
14079 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14080 self.group.hv_state_static)
14082 if self.op.disk_state:
14083 self.new_disk_state = \
14084 _MergeAndVerifyDiskState(self.op.disk_state,
14085 self.group.disk_state_static)
14087 if self.op.ipolicy:
14088 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14092 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14093 inst_filter = lambda inst: inst.name in owned_instances
14094 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14095 gmi = ganeti.masterd.instance
14097 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14099 new_ipolicy, instances)
14102 self.LogWarning("After the ipolicy change the following instances"
14103 " violate them: %s",
14104 utils.CommaJoin(violations))
14106 def BuildHooksEnv(self):
14107 """Build hooks env.
14111 "GROUP_NAME": self.op.group_name,
14112 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14115 def BuildHooksNodes(self):
14116 """Build hooks nodes.
14119 mn = self.cfg.GetMasterNode()
14120 return ([mn], [mn])
14122 def Exec(self, feedback_fn):
14123 """Modifies the node group.
14128 if self.op.ndparams:
14129 self.group.ndparams = self.new_ndparams
14130 result.append(("ndparams", str(self.group.ndparams)))
14132 if self.op.diskparams:
14133 self.group.diskparams = self.new_diskparams
14134 result.append(("diskparams", str(self.group.diskparams)))
14136 if self.op.alloc_policy:
14137 self.group.alloc_policy = self.op.alloc_policy
14139 if self.op.hv_state:
14140 self.group.hv_state_static = self.new_hv_state
14142 if self.op.disk_state:
14143 self.group.disk_state_static = self.new_disk_state
14145 if self.op.ipolicy:
14146 self.group.ipolicy = self.new_ipolicy
14148 self.cfg.Update(self.group, feedback_fn)
14152 class LUGroupRemove(LogicalUnit):
14153 HPATH = "group-remove"
14154 HTYPE = constants.HTYPE_GROUP
14157 def ExpandNames(self):
14158 # This will raises errors.OpPrereqError on its own:
14159 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14160 self.needed_locks = {
14161 locking.LEVEL_NODEGROUP: [self.group_uuid],
14164 def CheckPrereq(self):
14165 """Check prerequisites.
14167 This checks that the given group name exists as a node group, that is
14168 empty (i.e., contains no nodes), and that is not the last group of the
14172 # Verify that the group is empty.
14173 group_nodes = [node.name
14174 for node in self.cfg.GetAllNodesInfo().values()
14175 if node.group == self.group_uuid]
14178 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14180 (self.op.group_name,
14181 utils.CommaJoin(utils.NiceSort(group_nodes))),
14182 errors.ECODE_STATE)
14184 # Verify the cluster would not be left group-less.
14185 if len(self.cfg.GetNodeGroupList()) == 1:
14186 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14187 " removed" % self.op.group_name,
14188 errors.ECODE_STATE)
14190 def BuildHooksEnv(self):
14191 """Build hooks env.
14195 "GROUP_NAME": self.op.group_name,
14198 def BuildHooksNodes(self):
14199 """Build hooks nodes.
14202 mn = self.cfg.GetMasterNode()
14203 return ([mn], [mn])
14205 def Exec(self, feedback_fn):
14206 """Remove the node group.
14210 self.cfg.RemoveNodeGroup(self.group_uuid)
14211 except errors.ConfigurationError:
14212 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14213 (self.op.group_name, self.group_uuid))
14215 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14218 class LUGroupRename(LogicalUnit):
14219 HPATH = "group-rename"
14220 HTYPE = constants.HTYPE_GROUP
14223 def ExpandNames(self):
14224 # This raises errors.OpPrereqError on its own:
14225 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14227 self.needed_locks = {
14228 locking.LEVEL_NODEGROUP: [self.group_uuid],
14231 def CheckPrereq(self):
14232 """Check prerequisites.
14234 Ensures requested new name is not yet used.
14238 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14239 except errors.OpPrereqError:
14242 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14243 " node group (UUID: %s)" %
14244 (self.op.new_name, new_name_uuid),
14245 errors.ECODE_EXISTS)
14247 def BuildHooksEnv(self):
14248 """Build hooks env.
14252 "OLD_NAME": self.op.group_name,
14253 "NEW_NAME": self.op.new_name,
14256 def BuildHooksNodes(self):
14257 """Build hooks nodes.
14260 mn = self.cfg.GetMasterNode()
14262 all_nodes = self.cfg.GetAllNodesInfo()
14263 all_nodes.pop(mn, None)
14266 run_nodes.extend(node.name for node in all_nodes.values()
14267 if node.group == self.group_uuid)
14269 return (run_nodes, run_nodes)
14271 def Exec(self, feedback_fn):
14272 """Rename the node group.
14275 group = self.cfg.GetNodeGroup(self.group_uuid)
14278 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14279 (self.op.group_name, self.group_uuid))
14281 group.name = self.op.new_name
14282 self.cfg.Update(group, feedback_fn)
14284 return self.op.new_name
14287 class LUGroupEvacuate(LogicalUnit):
14288 HPATH = "group-evacuate"
14289 HTYPE = constants.HTYPE_GROUP
14292 def ExpandNames(self):
14293 # This raises errors.OpPrereqError on its own:
14294 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14296 if self.op.target_groups:
14297 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14298 self.op.target_groups)
14300 self.req_target_uuids = []
14302 if self.group_uuid in self.req_target_uuids:
14303 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14304 " as a target group (targets are %s)" %
14306 utils.CommaJoin(self.req_target_uuids)),
14307 errors.ECODE_INVAL)
14309 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14311 self.share_locks = _ShareAll()
14312 self.needed_locks = {
14313 locking.LEVEL_INSTANCE: [],
14314 locking.LEVEL_NODEGROUP: [],
14315 locking.LEVEL_NODE: [],
14318 def DeclareLocks(self, level):
14319 if level == locking.LEVEL_INSTANCE:
14320 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14322 # Lock instances optimistically, needs verification once node and group
14323 # locks have been acquired
14324 self.needed_locks[locking.LEVEL_INSTANCE] = \
14325 self.cfg.GetNodeGroupInstances(self.group_uuid)
14327 elif level == locking.LEVEL_NODEGROUP:
14328 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14330 if self.req_target_uuids:
14331 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14333 # Lock all groups used by instances optimistically; this requires going
14334 # via the node before it's locked, requiring verification later on
14335 lock_groups.update(group_uuid
14336 for instance_name in
14337 self.owned_locks(locking.LEVEL_INSTANCE)
14339 self.cfg.GetInstanceNodeGroups(instance_name))
14341 # No target groups, need to lock all of them
14342 lock_groups = locking.ALL_SET
14344 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14346 elif level == locking.LEVEL_NODE:
14347 # This will only lock the nodes in the group to be evacuated which
14348 # contain actual instances
14349 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14350 self._LockInstancesNodes()
14352 # Lock all nodes in group to be evacuated and target groups
14353 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14354 assert self.group_uuid in owned_groups
14355 member_nodes = [node_name
14356 for group in owned_groups
14357 for node_name in self.cfg.GetNodeGroup(group).members]
14358 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14360 def CheckPrereq(self):
14361 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14362 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14363 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14365 assert owned_groups.issuperset(self.req_target_uuids)
14366 assert self.group_uuid in owned_groups
14368 # Check if locked instances are still correct
14369 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14371 # Get instance information
14372 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14374 # Check if node groups for locked instances are still correct
14375 _CheckInstancesNodeGroups(self.cfg, self.instances,
14376 owned_groups, owned_nodes, self.group_uuid)
14378 if self.req_target_uuids:
14379 # User requested specific target groups
14380 self.target_uuids = self.req_target_uuids
14382 # All groups except the one to be evacuated are potential targets
14383 self.target_uuids = [group_uuid for group_uuid in owned_groups
14384 if group_uuid != self.group_uuid]
14386 if not self.target_uuids:
14387 raise errors.OpPrereqError("There are no possible target groups",
14388 errors.ECODE_INVAL)
14390 def BuildHooksEnv(self):
14391 """Build hooks env.
14395 "GROUP_NAME": self.op.group_name,
14396 "TARGET_GROUPS": " ".join(self.target_uuids),
14399 def BuildHooksNodes(self):
14400 """Build hooks nodes.
14403 mn = self.cfg.GetMasterNode()
14405 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14407 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14409 return (run_nodes, run_nodes)
14411 def Exec(self, feedback_fn):
14412 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14414 assert self.group_uuid not in self.target_uuids
14416 req = iallocator.IAReqGroupChange(instances=instances,
14417 target_groups=self.target_uuids)
14418 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14420 ial.Run(self.op.iallocator)
14422 if not ial.success:
14423 raise errors.OpPrereqError("Can't compute group evacuation using"
14424 " iallocator '%s': %s" %
14425 (self.op.iallocator, ial.info),
14426 errors.ECODE_NORES)
14428 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14430 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14431 len(jobs), self.op.group_name)
14433 return ResultWithJobs(jobs)
14436 class TagsLU(NoHooksLU): # pylint: disable=W0223
14437 """Generic tags LU.
14439 This is an abstract class which is the parent of all the other tags LUs.
14442 def ExpandNames(self):
14443 self.group_uuid = None
14444 self.needed_locks = {}
14446 if self.op.kind == constants.TAG_NODE:
14447 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14448 lock_level = locking.LEVEL_NODE
14449 lock_name = self.op.name
14450 elif self.op.kind == constants.TAG_INSTANCE:
14451 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14452 lock_level = locking.LEVEL_INSTANCE
14453 lock_name = self.op.name
14454 elif self.op.kind == constants.TAG_NODEGROUP:
14455 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14456 lock_level = locking.LEVEL_NODEGROUP
14457 lock_name = self.group_uuid
14462 if lock_level and getattr(self.op, "use_locking", True):
14463 self.needed_locks[lock_level] = lock_name
14465 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14466 # not possible to acquire the BGL based on opcode parameters)
14468 def CheckPrereq(self):
14469 """Check prerequisites.
14472 if self.op.kind == constants.TAG_CLUSTER:
14473 self.target = self.cfg.GetClusterInfo()
14474 elif self.op.kind == constants.TAG_NODE:
14475 self.target = self.cfg.GetNodeInfo(self.op.name)
14476 elif self.op.kind == constants.TAG_INSTANCE:
14477 self.target = self.cfg.GetInstanceInfo(self.op.name)
14478 elif self.op.kind == constants.TAG_NODEGROUP:
14479 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14481 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14482 str(self.op.kind), errors.ECODE_INVAL)
14485 class LUTagsGet(TagsLU):
14486 """Returns the tags of a given object.
14491 def ExpandNames(self):
14492 TagsLU.ExpandNames(self)
14494 # Share locks as this is only a read operation
14495 self.share_locks = _ShareAll()
14497 def Exec(self, feedback_fn):
14498 """Returns the tag list.
14501 return list(self.target.GetTags())
14504 class LUTagsSearch(NoHooksLU):
14505 """Searches the tags for a given pattern.
14510 def ExpandNames(self):
14511 self.needed_locks = {}
14513 def CheckPrereq(self):
14514 """Check prerequisites.
14516 This checks the pattern passed for validity by compiling it.
14520 self.re = re.compile(self.op.pattern)
14521 except re.error, err:
14522 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14523 (self.op.pattern, err), errors.ECODE_INVAL)
14525 def Exec(self, feedback_fn):
14526 """Returns the tag list.
14530 tgts = [("/cluster", cfg.GetClusterInfo())]
14531 ilist = cfg.GetAllInstancesInfo().values()
14532 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14533 nlist = cfg.GetAllNodesInfo().values()
14534 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14535 tgts.extend(("/nodegroup/%s" % n.name, n)
14536 for n in cfg.GetAllNodeGroupsInfo().values())
14538 for path, target in tgts:
14539 for tag in target.GetTags():
14540 if self.re.search(tag):
14541 results.append((path, tag))
14545 class LUTagsSet(TagsLU):
14546 """Sets a tag on a given object.
14551 def CheckPrereq(self):
14552 """Check prerequisites.
14554 This checks the type and length of the tag name and value.
14557 TagsLU.CheckPrereq(self)
14558 for tag in self.op.tags:
14559 objects.TaggableObject.ValidateTag(tag)
14561 def Exec(self, feedback_fn):
14566 for tag in self.op.tags:
14567 self.target.AddTag(tag)
14568 except errors.TagError, err:
14569 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14570 self.cfg.Update(self.target, feedback_fn)
14573 class LUTagsDel(TagsLU):
14574 """Delete a list of tags from a given object.
14579 def CheckPrereq(self):
14580 """Check prerequisites.
14582 This checks that we have the given tag.
14585 TagsLU.CheckPrereq(self)
14586 for tag in self.op.tags:
14587 objects.TaggableObject.ValidateTag(tag)
14588 del_tags = frozenset(self.op.tags)
14589 cur_tags = self.target.GetTags()
14591 diff_tags = del_tags - cur_tags
14593 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14594 raise errors.OpPrereqError("Tag(s) %s not found" %
14595 (utils.CommaJoin(diff_names), ),
14596 errors.ECODE_NOENT)
14598 def Exec(self, feedback_fn):
14599 """Remove the tag from the object.
14602 for tag in self.op.tags:
14603 self.target.RemoveTag(tag)
14604 self.cfg.Update(self.target, feedback_fn)
14607 class LUTestDelay(NoHooksLU):
14608 """Sleep for a specified amount of time.
14610 This LU sleeps on the master and/or nodes for a specified amount of
14616 def ExpandNames(self):
14617 """Expand names and set required locks.
14619 This expands the node list, if any.
14622 self.needed_locks = {}
14623 if self.op.on_nodes:
14624 # _GetWantedNodes can be used here, but is not always appropriate to use
14625 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14626 # more information.
14627 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14628 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14630 def _TestDelay(self):
14631 """Do the actual sleep.
14634 if self.op.on_master:
14635 if not utils.TestDelay(self.op.duration):
14636 raise errors.OpExecError("Error during master delay test")
14637 if self.op.on_nodes:
14638 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14639 for node, node_result in result.items():
14640 node_result.Raise("Failure during rpc call to node %s" % node)
14642 def Exec(self, feedback_fn):
14643 """Execute the test delay opcode, with the wanted repetitions.
14646 if self.op.repeat == 0:
14649 top_value = self.op.repeat - 1
14650 for i in range(self.op.repeat):
14651 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14655 class LUTestJqueue(NoHooksLU):
14656 """Utility LU to test some aspects of the job queue.
14661 # Must be lower than default timeout for WaitForJobChange to see whether it
14662 # notices changed jobs
14663 _CLIENT_CONNECT_TIMEOUT = 20.0
14664 _CLIENT_CONFIRM_TIMEOUT = 60.0
14667 def _NotifyUsingSocket(cls, cb, errcls):
14668 """Opens a Unix socket and waits for another program to connect.
14671 @param cb: Callback to send socket name to client
14672 @type errcls: class
14673 @param errcls: Exception class to use for errors
14676 # Using a temporary directory as there's no easy way to create temporary
14677 # sockets without writing a custom loop around tempfile.mktemp and
14679 tmpdir = tempfile.mkdtemp()
14681 tmpsock = utils.PathJoin(tmpdir, "sock")
14683 logging.debug("Creating temporary socket at %s", tmpsock)
14684 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14689 # Send details to client
14692 # Wait for client to connect before continuing
14693 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14695 (conn, _) = sock.accept()
14696 except socket.error, err:
14697 raise errcls("Client didn't connect in time (%s)" % err)
14701 # Remove as soon as client is connected
14702 shutil.rmtree(tmpdir)
14704 # Wait for client to close
14707 # pylint: disable=E1101
14708 # Instance of '_socketobject' has no ... member
14709 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14711 except socket.error, err:
14712 raise errcls("Client failed to confirm notification (%s)" % err)
14716 def _SendNotification(self, test, arg, sockname):
14717 """Sends a notification to the client.
14720 @param test: Test name
14721 @param arg: Test argument (depends on test)
14722 @type sockname: string
14723 @param sockname: Socket path
14726 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14728 def _Notify(self, prereq, test, arg):
14729 """Notifies the client of a test.
14732 @param prereq: Whether this is a prereq-phase test
14734 @param test: Test name
14735 @param arg: Test argument (depends on test)
14739 errcls = errors.OpPrereqError
14741 errcls = errors.OpExecError
14743 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14747 def CheckArguments(self):
14748 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14749 self.expandnames_calls = 0
14751 def ExpandNames(self):
14752 checkargs_calls = getattr(self, "checkargs_calls", 0)
14753 if checkargs_calls < 1:
14754 raise errors.ProgrammerError("CheckArguments was not called")
14756 self.expandnames_calls += 1
14758 if self.op.notify_waitlock:
14759 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14761 self.LogInfo("Expanding names")
14763 # Get lock on master node (just to get a lock, not for a particular reason)
14764 self.needed_locks = {
14765 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14768 def Exec(self, feedback_fn):
14769 if self.expandnames_calls < 1:
14770 raise errors.ProgrammerError("ExpandNames was not called")
14772 if self.op.notify_exec:
14773 self._Notify(False, constants.JQT_EXEC, None)
14775 self.LogInfo("Executing")
14777 if self.op.log_messages:
14778 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14779 for idx, msg in enumerate(self.op.log_messages):
14780 self.LogInfo("Sending log message %s", idx + 1)
14781 feedback_fn(constants.JQT_MSGPREFIX + msg)
14782 # Report how many test messages have been sent
14783 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14786 raise errors.OpExecError("Opcode failure was requested")
14791 class LUTestAllocator(NoHooksLU):
14792 """Run allocator tests.
14794 This LU runs the allocator tests
14797 def CheckPrereq(self):
14798 """Check prerequisites.
14800 This checks the opcode parameters depending on the director and mode test.
14803 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
14804 constants.IALLOCATOR_MODE_MULTI_ALLOC):
14805 for attr in ["memory", "disks", "disk_template",
14806 "os", "tags", "nics", "vcpus"]:
14807 if not hasattr(self.op, attr):
14808 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14809 attr, errors.ECODE_INVAL)
14810 iname = self.cfg.ExpandInstanceName(self.op.name)
14811 if iname is not None:
14812 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14813 iname, errors.ECODE_EXISTS)
14814 if not isinstance(self.op.nics, list):
14815 raise errors.OpPrereqError("Invalid parameter 'nics'",
14816 errors.ECODE_INVAL)
14817 if not isinstance(self.op.disks, list):
14818 raise errors.OpPrereqError("Invalid parameter 'disks'",
14819 errors.ECODE_INVAL)
14820 for row in self.op.disks:
14821 if (not isinstance(row, dict) or
14822 constants.IDISK_SIZE not in row or
14823 not isinstance(row[constants.IDISK_SIZE], int) or
14824 constants.IDISK_MODE not in row or
14825 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14826 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14827 " parameter", errors.ECODE_INVAL)
14828 if self.op.hypervisor is None:
14829 self.op.hypervisor = self.cfg.GetHypervisorType()
14830 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14831 fname = _ExpandInstanceName(self.cfg, self.op.name)
14832 self.op.name = fname
14833 self.relocate_from = \
14834 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14835 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14836 constants.IALLOCATOR_MODE_NODE_EVAC):
14837 if not self.op.instances:
14838 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14839 self.op.instances = _GetWantedInstances(self, self.op.instances)
14841 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14842 self.op.mode, errors.ECODE_INVAL)
14844 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14845 if self.op.allocator is None:
14846 raise errors.OpPrereqError("Missing allocator name",
14847 errors.ECODE_INVAL)
14848 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14849 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14850 self.op.direction, errors.ECODE_INVAL)
14852 def Exec(self, feedback_fn):
14853 """Run the allocator test.
14856 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14857 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
14858 memory=self.op.memory,
14859 disks=self.op.disks,
14860 disk_template=self.op.disk_template,
14864 vcpus=self.op.vcpus,
14865 spindle_use=self.op.spindle_use,
14866 hypervisor=self.op.hypervisor)
14867 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14868 req = iallocator.IAReqRelocate(name=self.op.name,
14869 relocate_from=list(self.relocate_from))
14870 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14871 req = iallocator.IAReqGroupChange(instances=self.op.instances,
14872 target_groups=self.op.target_groups)
14873 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14874 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
14875 evac_mode=self.op.evac_mode)
14876 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
14877 disk_template = self.op.disk_template
14878 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
14879 memory=self.op.memory,
14880 disks=self.op.disks,
14881 disk_template=disk_template,
14885 vcpus=self.op.vcpus,
14886 spindle_use=self.op.spindle_use,
14887 hypervisor=self.op.hypervisor)
14888 for idx in range(self.op.count)]
14889 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
14891 raise errors.ProgrammerError("Uncatched mode %s in"
14892 " LUTestAllocator.Exec", self.op.mode)
14894 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14895 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14896 result = ial.in_text
14898 ial.Run(self.op.allocator, validate=False)
14899 result = ial.out_text
14903 #: Query type implementations
14905 constants.QR_CLUSTER: _ClusterQuery,
14906 constants.QR_INSTANCE: _InstanceQuery,
14907 constants.QR_NODE: _NodeQuery,
14908 constants.QR_GROUP: _GroupQuery,
14909 constants.QR_OS: _OsQuery,
14910 constants.QR_EXPORT: _ExportQuery,
14913 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14916 def _GetQueryImplementation(name):
14917 """Returns the implemtnation for a query type.
14919 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14923 return _QUERY_IMPL[name]
14925 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14926 errors.ECODE_INVAL)