4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _CopyLockList(names):
701 """Makes a copy of a list of lock names.
703 Handles L{locking.ALL_SET} correctly.
706 if names == locking.ALL_SET:
707 return locking.ALL_SET
712 def _GetWantedNodes(lu, nodes):
713 """Returns list of checked and expanded node names.
715 @type lu: L{LogicalUnit}
716 @param lu: the logical unit on whose behalf we execute
718 @param nodes: list of node names or None for all nodes
720 @return: the list of nodes, sorted
721 @raise errors.ProgrammerError: if the nodes parameter is wrong type
725 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
727 return utils.NiceSort(lu.cfg.GetNodeList())
730 def _GetWantedInstances(lu, instances):
731 """Returns list of checked and expanded instance names.
733 @type lu: L{LogicalUnit}
734 @param lu: the logical unit on whose behalf we execute
735 @type instances: list
736 @param instances: list of instance names or None for all instances
738 @return: the list of instances, sorted
739 @raise errors.OpPrereqError: if the instances parameter is wrong type
740 @raise errors.OpPrereqError: if any of the passed instances is not found
744 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
746 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
750 def _GetUpdatedParams(old_params, update_dict,
751 use_default=True, use_none=False):
752 """Return the new version of a parameter dictionary.
754 @type old_params: dict
755 @param old_params: old parameters
756 @type update_dict: dict
757 @param update_dict: dict containing new parameter values, or
758 constants.VALUE_DEFAULT to reset the parameter to its default
760 @param use_default: boolean
761 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
762 values as 'to be deleted' values
763 @param use_none: boolean
764 @type use_none: whether to recognise C{None} values as 'to be
767 @return: the new parameter dictionary
770 params_copy = copy.deepcopy(old_params)
771 for key, val in update_dict.iteritems():
772 if ((use_default and val == constants.VALUE_DEFAULT) or
773 (use_none and val is None)):
779 params_copy[key] = val
783 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
784 """Return the new version of a instance policy.
786 @param group_policy: whether this policy applies to a group and thus
787 we should support removal of policy entries
790 use_none = use_default = group_policy
791 ipolicy = copy.deepcopy(old_ipolicy)
792 for key, value in new_ipolicy.items():
793 if key not in constants.IPOLICY_ALL_KEYS:
794 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
796 if key in constants.IPOLICY_ISPECS:
797 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
798 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
800 use_default=use_default)
802 if (not value or value == [constants.VALUE_DEFAULT] or
803 value == constants.VALUE_DEFAULT):
807 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
808 " on the cluster'" % key,
811 if key in constants.IPOLICY_PARAMETERS:
812 # FIXME: we assume all such values are float
814 ipolicy[key] = float(value)
815 except (TypeError, ValueError), err:
816 raise errors.OpPrereqError("Invalid value for attribute"
817 " '%s': '%s', error: %s" %
818 (key, value, err), errors.ECODE_INVAL)
820 # FIXME: we assume all others are lists; this should be redone
822 ipolicy[key] = list(value)
824 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
825 except errors.ConfigurationError, err:
826 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
831 def _UpdateAndVerifySubDict(base, updates, type_check):
832 """Updates and verifies a dict with sub dicts of the same type.
834 @param base: The dict with the old data
835 @param updates: The dict with the new data
836 @param type_check: Dict suitable to ForceDictType to verify correct types
837 @returns: A new dict with updated and verified values
841 new = _GetUpdatedParams(old, value)
842 utils.ForceDictType(new, type_check)
845 ret = copy.deepcopy(base)
846 ret.update(dict((key, fn(base.get(key, {}), value))
847 for key, value in updates.items()))
851 def _MergeAndVerifyHvState(op_input, obj_input):
852 """Combines the hv state from an opcode with the one of the object
854 @param op_input: The input dict from the opcode
855 @param obj_input: The input dict from the objects
856 @return: The verified and updated dict
860 invalid_hvs = set(op_input) - constants.HYPER_TYPES
862 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
863 " %s" % utils.CommaJoin(invalid_hvs),
865 if obj_input is None:
867 type_check = constants.HVSTS_PARAMETER_TYPES
868 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
873 def _MergeAndVerifyDiskState(op_input, obj_input):
874 """Combines the disk state from an opcode with the one of the object
876 @param op_input: The input dict from the opcode
877 @param obj_input: The input dict from the objects
878 @return: The verified and updated dict
881 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
883 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
884 utils.CommaJoin(invalid_dst),
886 type_check = constants.DSS_PARAMETER_TYPES
887 if obj_input is None:
889 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
891 for key, value in op_input.items())
896 def _ReleaseLocks(lu, level, names=None, keep=None):
897 """Releases locks owned by an LU.
899 @type lu: L{LogicalUnit}
900 @param level: Lock level
901 @type names: list or None
902 @param names: Names of locks to release
903 @type keep: list or None
904 @param keep: Names of locks to retain
907 assert not (keep is not None and names is not None), \
908 "Only one of the 'names' and the 'keep' parameters can be given"
910 if names is not None:
911 should_release = names.__contains__
913 should_release = lambda name: name not in keep
915 should_release = None
917 owned = lu.owned_locks(level)
919 # Not owning any lock at this level, do nothing
926 # Determine which locks to release
928 if should_release(name):
933 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
935 # Release just some locks
936 lu.glm.release(level, names=release)
938 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
941 lu.glm.release(level)
943 assert not lu.glm.is_owned(level), "No locks should be owned"
946 def _MapInstanceDisksToNodes(instances):
947 """Creates a map from (node, volume) to instance name.
949 @type instances: list of L{objects.Instance}
950 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
953 return dict(((node, vol), inst.name)
954 for inst in instances
955 for (node, vols) in inst.MapLVsByNode().items()
959 def _RunPostHook(lu, node_name):
960 """Runs the post-hook for an opcode on a single node.
963 hm = lu.proc.BuildHooksManager(lu)
965 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
966 except Exception, err: # pylint: disable=W0703
967 lu.LogWarning("Errors occurred running hooks on %s: %s",
971 def _CheckOutputFields(static, dynamic, selected):
972 """Checks whether all selected fields are valid.
974 @type static: L{utils.FieldSet}
975 @param static: static fields set
976 @type dynamic: L{utils.FieldSet}
977 @param dynamic: dynamic fields set
984 delta = f.NonMatching(selected)
986 raise errors.OpPrereqError("Unknown output fields selected: %s"
987 % ",".join(delta), errors.ECODE_INVAL)
990 def _CheckGlobalHvParams(params):
991 """Validates that given hypervisor params are not global ones.
993 This will ensure that instances don't get customised versions of
997 used_globals = constants.HVC_GLOBALS.intersection(params)
999 msg = ("The following hypervisor parameters are global and cannot"
1000 " be customized at instance level, please modify them at"
1001 " cluster level: %s" % utils.CommaJoin(used_globals))
1002 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1005 def _CheckNodeOnline(lu, node, msg=None):
1006 """Ensure that a given node is online.
1008 @param lu: the LU on behalf of which we make the check
1009 @param node: the node to check
1010 @param msg: if passed, should be a message to replace the default one
1011 @raise errors.OpPrereqError: if the node is offline
1015 msg = "Can't use offline node"
1016 if lu.cfg.GetNodeInfo(node).offline:
1017 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1020 def _CheckNodeNotDrained(lu, node):
1021 """Ensure that a given node is not drained.
1023 @param lu: the LU on behalf of which we make the check
1024 @param node: the node to check
1025 @raise errors.OpPrereqError: if the node is drained
1028 if lu.cfg.GetNodeInfo(node).drained:
1029 raise errors.OpPrereqError("Can't use drained node %s" % node,
1033 def _CheckNodeVmCapable(lu, node):
1034 """Ensure that a given node is vm capable.
1036 @param lu: the LU on behalf of which we make the check
1037 @param node: the node to check
1038 @raise errors.OpPrereqError: if the node is not vm capable
1041 if not lu.cfg.GetNodeInfo(node).vm_capable:
1042 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1046 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1047 """Ensure that a node supports a given OS.
1049 @param lu: the LU on behalf of which we make the check
1050 @param node: the node to check
1051 @param os_name: the OS to query about
1052 @param force_variant: whether to ignore variant errors
1053 @raise errors.OpPrereqError: if the node is not supporting the OS
1056 result = lu.rpc.call_os_get(node, os_name)
1057 result.Raise("OS '%s' not in supported OS list for node %s" %
1059 prereq=True, ecode=errors.ECODE_INVAL)
1060 if not force_variant:
1061 _CheckOSVariant(result.payload, os_name)
1064 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1065 """Ensure that a node has the given secondary ip.
1067 @type lu: L{LogicalUnit}
1068 @param lu: the LU on behalf of which we make the check
1070 @param node: the node to check
1071 @type secondary_ip: string
1072 @param secondary_ip: the ip to check
1073 @type prereq: boolean
1074 @param prereq: whether to throw a prerequisite or an execute error
1075 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1076 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1079 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1080 result.Raise("Failure checking secondary ip on node %s" % node,
1081 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1082 if not result.payload:
1083 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1084 " please fix and re-run this command" % secondary_ip)
1086 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1088 raise errors.OpExecError(msg)
1091 def _GetClusterDomainSecret():
1092 """Reads the cluster domain secret.
1095 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1099 def _CheckInstanceState(lu, instance, req_states, msg=None):
1100 """Ensure that an instance is in one of the required states.
1102 @param lu: the LU on behalf of which we make the check
1103 @param instance: the instance to check
1104 @param msg: if passed, should be a message to replace the default one
1105 @raise errors.OpPrereqError: if the instance is not in the required state
1109 msg = ("can't use instance from outside %s states" %
1110 utils.CommaJoin(req_states))
1111 if instance.admin_state not in req_states:
1112 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1113 (instance.name, instance.admin_state, msg),
1116 if constants.ADMINST_UP not in req_states:
1117 pnode = instance.primary_node
1118 if not lu.cfg.GetNodeInfo(pnode).offline:
1119 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1120 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1121 prereq=True, ecode=errors.ECODE_ENVIRON)
1122 if instance.name in ins_l.payload:
1123 raise errors.OpPrereqError("Instance %s is running, %s" %
1124 (instance.name, msg), errors.ECODE_STATE)
1126 lu.LogWarning("Primary node offline, ignoring check that instance"
1130 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1131 """Computes if value is in the desired range.
1133 @param name: name of the parameter for which we perform the check
1134 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1136 @param ipolicy: dictionary containing min, max and std values
1137 @param value: actual value that we want to use
1138 @return: None or element not meeting the criteria
1142 if value in [None, constants.VALUE_AUTO]:
1144 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1145 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1146 if value > max_v or min_v > value:
1148 fqn = "%s/%s" % (name, qualifier)
1151 return ("%s value %s is not in range [%s, %s]" %
1152 (fqn, value, min_v, max_v))
1156 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1157 nic_count, disk_sizes, spindle_use,
1158 _compute_fn=_ComputeMinMaxSpec):
1159 """Verifies ipolicy against provided specs.
1162 @param ipolicy: The ipolicy
1164 @param mem_size: The memory size
1165 @type cpu_count: int
1166 @param cpu_count: Used cpu cores
1167 @type disk_count: int
1168 @param disk_count: Number of disks used
1169 @type nic_count: int
1170 @param nic_count: Number of nics used
1171 @type disk_sizes: list of ints
1172 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1173 @type spindle_use: int
1174 @param spindle_use: The number of spindles this instance uses
1175 @param _compute_fn: The compute function (unittest only)
1176 @return: A list of violations, or an empty list of no violations are found
1179 assert disk_count == len(disk_sizes)
1182 (constants.ISPEC_MEM_SIZE, "", mem_size),
1183 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1184 (constants.ISPEC_DISK_COUNT, "", disk_count),
1185 (constants.ISPEC_NIC_COUNT, "", nic_count),
1186 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1187 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1188 for idx, d in enumerate(disk_sizes)]
1191 (_compute_fn(name, qualifier, ipolicy, value)
1192 for (name, qualifier, value) in test_settings))
1195 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1196 _compute_fn=_ComputeIPolicySpecViolation):
1197 """Compute if instance meets the specs of ipolicy.
1200 @param ipolicy: The ipolicy to verify against
1201 @type instance: L{objects.Instance}
1202 @param instance: The instance to verify
1203 @param _compute_fn: The function to verify ipolicy (unittest only)
1204 @see: L{_ComputeIPolicySpecViolation}
1207 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1208 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1209 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1210 disk_count = len(instance.disks)
1211 disk_sizes = [disk.size for disk in instance.disks]
1212 nic_count = len(instance.nics)
1214 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1215 disk_sizes, spindle_use)
1218 def _ComputeIPolicyInstanceSpecViolation(
1219 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1220 """Compute if instance specs meets the specs of ipolicy.
1223 @param ipolicy: The ipolicy to verify against
1224 @param instance_spec: dict
1225 @param instance_spec: The instance spec to verify
1226 @param _compute_fn: The function to verify ipolicy (unittest only)
1227 @see: L{_ComputeIPolicySpecViolation}
1230 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1231 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1232 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1233 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1234 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1235 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1237 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1238 disk_sizes, spindle_use)
1241 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1243 _compute_fn=_ComputeIPolicyInstanceViolation):
1244 """Compute if instance meets the specs of the new target group.
1246 @param ipolicy: The ipolicy to verify
1247 @param instance: The instance object to verify
1248 @param current_group: The current group of the instance
1249 @param target_group: The new group of the instance
1250 @param _compute_fn: The function to verify ipolicy (unittest only)
1251 @see: L{_ComputeIPolicySpecViolation}
1254 if current_group == target_group:
1257 return _compute_fn(ipolicy, instance)
1260 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1261 _compute_fn=_ComputeIPolicyNodeViolation):
1262 """Checks that the target node is correct in terms of instance policy.
1264 @param ipolicy: The ipolicy to verify
1265 @param instance: The instance object to verify
1266 @param node: The new node to relocate
1267 @param ignore: Ignore violations of the ipolicy
1268 @param _compute_fn: The function to verify ipolicy (unittest only)
1269 @see: L{_ComputeIPolicySpecViolation}
1272 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1273 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1276 msg = ("Instance does not meet target node group's (%s) instance"
1277 " policy: %s") % (node.group, utils.CommaJoin(res))
1281 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1284 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1285 """Computes a set of any instances that would violate the new ipolicy.
1287 @param old_ipolicy: The current (still in-place) ipolicy
1288 @param new_ipolicy: The new (to become) ipolicy
1289 @param instances: List of instances to verify
1290 @return: A list of instances which violates the new ipolicy but
1294 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1295 _ComputeViolatingInstances(old_ipolicy, instances))
1298 def _ExpandItemName(fn, name, kind):
1299 """Expand an item name.
1301 @param fn: the function to use for expansion
1302 @param name: requested item name
1303 @param kind: text description ('Node' or 'Instance')
1304 @return: the resolved (full) name
1305 @raise errors.OpPrereqError: if the item is not found
1308 full_name = fn(name)
1309 if full_name is None:
1310 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1315 def _ExpandNodeName(cfg, name):
1316 """Wrapper over L{_ExpandItemName} for nodes."""
1317 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1320 def _ExpandInstanceName(cfg, name):
1321 """Wrapper over L{_ExpandItemName} for instance."""
1322 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1325 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1326 network_type, mac_prefix, tags):
1327 """Builds network related env variables for hooks
1329 This builds the hook environment from individual variables.
1332 @param name: the name of the network
1333 @type subnet: string
1334 @param subnet: the ipv4 subnet
1335 @type gateway: string
1336 @param gateway: the ipv4 gateway
1337 @type network6: string
1338 @param network6: the ipv6 subnet
1339 @type gateway6: string
1340 @param gateway6: the ipv6 gateway
1341 @type network_type: string
1342 @param network_type: the type of the network
1343 @type mac_prefix: string
1344 @param mac_prefix: the mac_prefix
1346 @param tags: the tags of the network
1351 env["NETWORK_NAME"] = name
1353 env["NETWORK_SUBNET"] = subnet
1355 env["NETWORK_GATEWAY"] = gateway
1357 env["NETWORK_SUBNET6"] = network6
1359 env["NETWORK_GATEWAY6"] = gateway6
1361 env["NETWORK_MAC_PREFIX"] = mac_prefix
1363 env["NETWORK_TYPE"] = network_type
1365 env["NETWORK_TAGS"] = " ".join(tags)
1370 def _BuildNetworkHookEnvByObject(net):
1371 """Builds network related env varliables for hooks
1373 @type net: L{objects.Network}
1374 @param net: the network object
1379 "subnet": net.network,
1380 "gateway": net.gateway,
1381 "network6": net.network6,
1382 "gateway6": net.gateway6,
1383 "network_type": net.network_type,
1384 "mac_prefix": net.mac_prefix,
1388 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1391 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1392 minmem, maxmem, vcpus, nics, disk_template, disks,
1393 bep, hvp, hypervisor_name, tags):
1394 """Builds instance related env variables for hooks
1396 This builds the hook environment from individual variables.
1399 @param name: the name of the instance
1400 @type primary_node: string
1401 @param primary_node: the name of the instance's primary node
1402 @type secondary_nodes: list
1403 @param secondary_nodes: list of secondary nodes as strings
1404 @type os_type: string
1405 @param os_type: the name of the instance's OS
1406 @type status: string
1407 @param status: the desired status of the instance
1408 @type minmem: string
1409 @param minmem: the minimum memory size of the instance
1410 @type maxmem: string
1411 @param maxmem: the maximum memory size of the instance
1413 @param vcpus: the count of VCPUs the instance has
1415 @param nics: list of tuples (ip, mac, mode, link, network) representing
1416 the NICs the instance has
1417 @type disk_template: string
1418 @param disk_template: the disk template of the instance
1420 @param disks: the list of (size, mode) pairs
1422 @param bep: the backend parameters for the instance
1424 @param hvp: the hypervisor parameters for the instance
1425 @type hypervisor_name: string
1426 @param hypervisor_name: the hypervisor for the instance
1428 @param tags: list of instance tags as strings
1430 @return: the hook environment for this instance
1435 "INSTANCE_NAME": name,
1436 "INSTANCE_PRIMARY": primary_node,
1437 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1438 "INSTANCE_OS_TYPE": os_type,
1439 "INSTANCE_STATUS": status,
1440 "INSTANCE_MINMEM": minmem,
1441 "INSTANCE_MAXMEM": maxmem,
1442 # TODO(2.7) remove deprecated "memory" value
1443 "INSTANCE_MEMORY": maxmem,
1444 "INSTANCE_VCPUS": vcpus,
1445 "INSTANCE_DISK_TEMPLATE": disk_template,
1446 "INSTANCE_HYPERVISOR": hypervisor_name,
1449 nic_count = len(nics)
1450 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1453 env["INSTANCE_NIC%d_IP" % idx] = ip
1454 env["INSTANCE_NIC%d_MAC" % idx] = mac
1455 env["INSTANCE_NIC%d_MODE" % idx] = mode
1456 env["INSTANCE_NIC%d_LINK" % idx] = link
1458 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1460 nobj = objects.Network.FromDict(netinfo)
1462 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1464 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1466 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1468 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1470 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1471 if nobj.network_type:
1472 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1474 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1475 if mode == constants.NIC_MODE_BRIDGED:
1476 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1480 env["INSTANCE_NIC_COUNT"] = nic_count
1483 disk_count = len(disks)
1484 for idx, (size, mode) in enumerate(disks):
1485 env["INSTANCE_DISK%d_SIZE" % idx] = size
1486 env["INSTANCE_DISK%d_MODE" % idx] = mode
1490 env["INSTANCE_DISK_COUNT"] = disk_count
1495 env["INSTANCE_TAGS"] = " ".join(tags)
1497 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1498 for key, value in source.items():
1499 env["INSTANCE_%s_%s" % (kind, key)] = value
1504 def _NICToTuple(lu, nic):
1505 """Build a tupple of nic information.
1507 @type lu: L{LogicalUnit}
1508 @param lu: the logical unit on whose behalf we execute
1509 @type nic: L{objects.NIC}
1510 @param nic: nic to convert to hooks tuple
1515 cluster = lu.cfg.GetClusterInfo()
1516 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1517 mode = filled_params[constants.NIC_MODE]
1518 link = filled_params[constants.NIC_LINK]
1522 net_uuid = lu.cfg.LookupNetwork(net)
1524 nobj = lu.cfg.GetNetwork(net_uuid)
1525 netinfo = objects.Network.ToDict(nobj)
1526 return (ip, mac, mode, link, net, netinfo)
1529 def _NICListToTuple(lu, nics):
1530 """Build a list of nic information tuples.
1532 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1533 value in LUInstanceQueryData.
1535 @type lu: L{LogicalUnit}
1536 @param lu: the logical unit on whose behalf we execute
1537 @type nics: list of L{objects.NIC}
1538 @param nics: list of nics to convert to hooks tuples
1543 hooks_nics.append(_NICToTuple(lu, nic))
1547 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1548 """Builds instance related env variables for hooks from an object.
1550 @type lu: L{LogicalUnit}
1551 @param lu: the logical unit on whose behalf we execute
1552 @type instance: L{objects.Instance}
1553 @param instance: the instance for which we should build the
1555 @type override: dict
1556 @param override: dictionary with key/values that will override
1559 @return: the hook environment dictionary
1562 cluster = lu.cfg.GetClusterInfo()
1563 bep = cluster.FillBE(instance)
1564 hvp = cluster.FillHV(instance)
1566 "name": instance.name,
1567 "primary_node": instance.primary_node,
1568 "secondary_nodes": instance.secondary_nodes,
1569 "os_type": instance.os,
1570 "status": instance.admin_state,
1571 "maxmem": bep[constants.BE_MAXMEM],
1572 "minmem": bep[constants.BE_MINMEM],
1573 "vcpus": bep[constants.BE_VCPUS],
1574 "nics": _NICListToTuple(lu, instance.nics),
1575 "disk_template": instance.disk_template,
1576 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1579 "hypervisor_name": instance.hypervisor,
1580 "tags": instance.tags,
1583 args.update(override)
1584 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1587 def _AdjustCandidatePool(lu, exceptions):
1588 """Adjust the candidate pool after node operations.
1591 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1593 lu.LogInfo("Promoted nodes to master candidate role: %s",
1594 utils.CommaJoin(node.name for node in mod_list))
1595 for name in mod_list:
1596 lu.context.ReaddNode(name)
1597 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1599 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1603 def _DecideSelfPromotion(lu, exceptions=None):
1604 """Decide whether I should promote myself as a master candidate.
1607 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1608 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1609 # the new node will increase mc_max with one, so:
1610 mc_should = min(mc_should + 1, cp_size)
1611 return mc_now < mc_should
1614 def _ComputeViolatingInstances(ipolicy, instances):
1615 """Computes a set of instances who violates given ipolicy.
1617 @param ipolicy: The ipolicy to verify
1618 @type instances: object.Instance
1619 @param instances: List of instances to verify
1620 @return: A frozenset of instance names violating the ipolicy
1623 return frozenset([inst.name for inst in instances
1624 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1627 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1628 """Check that the brigdes needed by a list of nics exist.
1631 cluster = lu.cfg.GetClusterInfo()
1632 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1633 brlist = [params[constants.NIC_LINK] for params in paramslist
1634 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1636 result = lu.rpc.call_bridges_exist(target_node, brlist)
1637 result.Raise("Error checking bridges on destination node '%s'" %
1638 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1641 def _CheckInstanceBridgesExist(lu, instance, node=None):
1642 """Check that the brigdes needed by an instance exist.
1646 node = instance.primary_node
1647 _CheckNicsBridgesExist(lu, instance.nics, node)
1650 def _CheckOSVariant(os_obj, name):
1651 """Check whether an OS name conforms to the os variants specification.
1653 @type os_obj: L{objects.OS}
1654 @param os_obj: OS object to check
1656 @param name: OS name passed by the user, to check for validity
1659 variant = objects.OS.GetVariant(name)
1660 if not os_obj.supported_variants:
1662 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1663 " passed)" % (os_obj.name, variant),
1667 raise errors.OpPrereqError("OS name must include a variant",
1670 if variant not in os_obj.supported_variants:
1671 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1674 def _GetNodeInstancesInner(cfg, fn):
1675 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1678 def _GetNodeInstances(cfg, node_name):
1679 """Returns a list of all primary and secondary instances on a node.
1683 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1686 def _GetNodePrimaryInstances(cfg, node_name):
1687 """Returns primary instances on a node.
1690 return _GetNodeInstancesInner(cfg,
1691 lambda inst: node_name == inst.primary_node)
1694 def _GetNodeSecondaryInstances(cfg, node_name):
1695 """Returns secondary instances on a node.
1698 return _GetNodeInstancesInner(cfg,
1699 lambda inst: node_name in inst.secondary_nodes)
1702 def _GetStorageTypeArgs(cfg, storage_type):
1703 """Returns the arguments for a storage type.
1706 # Special case for file storage
1707 if storage_type == constants.ST_FILE:
1708 # storage.FileStorage wants a list of storage directories
1709 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1714 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1717 for dev in instance.disks:
1718 cfg.SetDiskID(dev, node_name)
1720 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1722 result.Raise("Failed to get disk status from node %s" % node_name,
1723 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1725 for idx, bdev_status in enumerate(result.payload):
1726 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1732 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1733 """Check the sanity of iallocator and node arguments and use the
1734 cluster-wide iallocator if appropriate.
1736 Check that at most one of (iallocator, node) is specified. If none is
1737 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1738 then the LU's opcode's iallocator slot is filled with the cluster-wide
1741 @type iallocator_slot: string
1742 @param iallocator_slot: the name of the opcode iallocator slot
1743 @type node_slot: string
1744 @param node_slot: the name of the opcode target node slot
1747 node = getattr(lu.op, node_slot, None)
1748 ialloc = getattr(lu.op, iallocator_slot, None)
1752 if node is not None and ialloc is not None:
1753 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1755 elif ((node is None and ialloc is None) or
1756 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1757 default_iallocator = lu.cfg.GetDefaultIAllocator()
1758 if default_iallocator:
1759 setattr(lu.op, iallocator_slot, default_iallocator)
1761 raise errors.OpPrereqError("No iallocator or node given and no"
1762 " cluster-wide default iallocator found;"
1763 " please specify either an iallocator or a"
1764 " node, or set a cluster-wide default"
1765 " iallocator", errors.ECODE_INVAL)
1768 def _GetDefaultIAllocator(cfg, ialloc):
1769 """Decides on which iallocator to use.
1771 @type cfg: L{config.ConfigWriter}
1772 @param cfg: Cluster configuration object
1773 @type ialloc: string or None
1774 @param ialloc: Iallocator specified in opcode
1776 @return: Iallocator name
1780 # Use default iallocator
1781 ialloc = cfg.GetDefaultIAllocator()
1784 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1785 " opcode nor as a cluster-wide default",
1791 def _CheckHostnameSane(lu, name):
1792 """Ensures that a given hostname resolves to a 'sane' name.
1794 The given name is required to be a prefix of the resolved hostname,
1795 to prevent accidental mismatches.
1797 @param lu: the logical unit on behalf of which we're checking
1798 @param name: the name we should resolve and check
1799 @return: the resolved hostname object
1802 hostname = netutils.GetHostname(name=name)
1803 if hostname.name != name:
1804 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1805 if not utils.MatchNameComponent(name, [hostname.name]):
1806 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1807 " same as given hostname '%s'") %
1808 (hostname.name, name), errors.ECODE_INVAL)
1812 class LUClusterPostInit(LogicalUnit):
1813 """Logical unit for running hooks after cluster initialization.
1816 HPATH = "cluster-init"
1817 HTYPE = constants.HTYPE_CLUSTER
1819 def BuildHooksEnv(self):
1824 "OP_TARGET": self.cfg.GetClusterName(),
1827 def BuildHooksNodes(self):
1828 """Build hooks nodes.
1831 return ([], [self.cfg.GetMasterNode()])
1833 def Exec(self, feedback_fn):
1840 class LUClusterDestroy(LogicalUnit):
1841 """Logical unit for destroying the cluster.
1844 HPATH = "cluster-destroy"
1845 HTYPE = constants.HTYPE_CLUSTER
1847 def BuildHooksEnv(self):
1852 "OP_TARGET": self.cfg.GetClusterName(),
1855 def BuildHooksNodes(self):
1856 """Build hooks nodes.
1861 def CheckPrereq(self):
1862 """Check prerequisites.
1864 This checks whether the cluster is empty.
1866 Any errors are signaled by raising errors.OpPrereqError.
1869 master = self.cfg.GetMasterNode()
1871 nodelist = self.cfg.GetNodeList()
1872 if len(nodelist) != 1 or nodelist[0] != master:
1873 raise errors.OpPrereqError("There are still %d node(s) in"
1874 " this cluster." % (len(nodelist) - 1),
1876 instancelist = self.cfg.GetInstanceList()
1878 raise errors.OpPrereqError("There are still %d instance(s) in"
1879 " this cluster." % len(instancelist),
1882 def Exec(self, feedback_fn):
1883 """Destroys the cluster.
1886 master_params = self.cfg.GetMasterNetworkParameters()
1888 # Run post hooks on master node before it's removed
1889 _RunPostHook(self, master_params.name)
1891 ems = self.cfg.GetUseExternalMipScript()
1892 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1895 self.LogWarning("Error disabling the master IP address: %s",
1898 return master_params.name
1901 def _VerifyCertificate(filename):
1902 """Verifies a certificate for L{LUClusterVerifyConfig}.
1904 @type filename: string
1905 @param filename: Path to PEM file
1909 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1910 utils.ReadFile(filename))
1911 except Exception, err: # pylint: disable=W0703
1912 return (LUClusterVerifyConfig.ETYPE_ERROR,
1913 "Failed to load X509 certificate %s: %s" % (filename, err))
1916 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1917 constants.SSL_CERT_EXPIRATION_ERROR)
1920 fnamemsg = "While verifying %s: %s" % (filename, msg)
1925 return (None, fnamemsg)
1926 elif errcode == utils.CERT_WARNING:
1927 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1928 elif errcode == utils.CERT_ERROR:
1929 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1931 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1934 def _GetAllHypervisorParameters(cluster, instances):
1935 """Compute the set of all hypervisor parameters.
1937 @type cluster: L{objects.Cluster}
1938 @param cluster: the cluster object
1939 @param instances: list of L{objects.Instance}
1940 @param instances: additional instances from which to obtain parameters
1941 @rtype: list of (origin, hypervisor, parameters)
1942 @return: a list with all parameters found, indicating the hypervisor they
1943 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1948 for hv_name in cluster.enabled_hypervisors:
1949 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1951 for os_name, os_hvp in cluster.os_hvp.items():
1952 for hv_name, hv_params in os_hvp.items():
1954 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1955 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1957 # TODO: collapse identical parameter values in a single one
1958 for instance in instances:
1959 if instance.hvparams:
1960 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1961 cluster.FillHV(instance)))
1966 class _VerifyErrors(object):
1967 """Mix-in for cluster/group verify LUs.
1969 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1970 self.op and self._feedback_fn to be available.)
1974 ETYPE_FIELD = "code"
1975 ETYPE_ERROR = "ERROR"
1976 ETYPE_WARNING = "WARNING"
1978 def _Error(self, ecode, item, msg, *args, **kwargs):
1979 """Format an error message.
1981 Based on the opcode's error_codes parameter, either format a
1982 parseable error code, or a simpler error string.
1984 This must be called only from Exec and functions called from Exec.
1987 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1988 itype, etxt, _ = ecode
1989 # first complete the msg
1992 # then format the whole message
1993 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1994 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2000 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2001 # and finally report it via the feedback_fn
2002 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2004 def _ErrorIf(self, cond, ecode, *args, **kwargs):
2005 """Log an error message if the passed condition is True.
2009 or self.op.debug_simulate_errors) # pylint: disable=E1101
2011 # If the error code is in the list of ignored errors, demote the error to a
2013 (_, etxt, _) = ecode
2014 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2015 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2018 self._Error(ecode, *args, **kwargs)
2020 # do not mark the operation as failed for WARN cases only
2021 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2022 self.bad = self.bad or cond
2025 class LUClusterVerify(NoHooksLU):
2026 """Submits all jobs necessary to verify the cluster.
2031 def ExpandNames(self):
2032 self.needed_locks = {}
2034 def Exec(self, feedback_fn):
2037 if self.op.group_name:
2038 groups = [self.op.group_name]
2039 depends_fn = lambda: None
2041 groups = self.cfg.GetNodeGroupList()
2043 # Verify global configuration
2045 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2048 # Always depend on global verification
2049 depends_fn = lambda: [(-len(jobs), [])]
2052 [opcodes.OpClusterVerifyGroup(group_name=group,
2053 ignore_errors=self.op.ignore_errors,
2054 depends=depends_fn())]
2055 for group in groups)
2057 # Fix up all parameters
2058 for op in itertools.chain(*jobs): # pylint: disable=W0142
2059 op.debug_simulate_errors = self.op.debug_simulate_errors
2060 op.verbose = self.op.verbose
2061 op.error_codes = self.op.error_codes
2063 op.skip_checks = self.op.skip_checks
2064 except AttributeError:
2065 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2067 return ResultWithJobs(jobs)
2070 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2071 """Verifies the cluster config.
2076 def _VerifyHVP(self, hvp_data):
2077 """Verifies locally the syntax of the hypervisor parameters.
2080 for item, hv_name, hv_params in hvp_data:
2081 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2084 hv_class = hypervisor.GetHypervisor(hv_name)
2085 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2086 hv_class.CheckParameterSyntax(hv_params)
2087 except errors.GenericError, err:
2088 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2090 def ExpandNames(self):
2091 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2092 self.share_locks = _ShareAll()
2094 def CheckPrereq(self):
2095 """Check prerequisites.
2098 # Retrieve all information
2099 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2100 self.all_node_info = self.cfg.GetAllNodesInfo()
2101 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2103 def Exec(self, feedback_fn):
2104 """Verify integrity of cluster, performing various test on nodes.
2108 self._feedback_fn = feedback_fn
2110 feedback_fn("* Verifying cluster config")
2112 for msg in self.cfg.VerifyConfig():
2113 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2115 feedback_fn("* Verifying cluster certificate files")
2117 for cert_filename in pathutils.ALL_CERT_FILES:
2118 (errcode, msg) = _VerifyCertificate(cert_filename)
2119 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2121 feedback_fn("* Verifying hypervisor parameters")
2123 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2124 self.all_inst_info.values()))
2126 feedback_fn("* Verifying all nodes belong to an existing group")
2128 # We do this verification here because, should this bogus circumstance
2129 # occur, it would never be caught by VerifyGroup, which only acts on
2130 # nodes/instances reachable from existing node groups.
2132 dangling_nodes = set(node.name for node in self.all_node_info.values()
2133 if node.group not in self.all_group_info)
2135 dangling_instances = {}
2136 no_node_instances = []
2138 for inst in self.all_inst_info.values():
2139 if inst.primary_node in dangling_nodes:
2140 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2141 elif inst.primary_node not in self.all_node_info:
2142 no_node_instances.append(inst.name)
2147 utils.CommaJoin(dangling_instances.get(node.name,
2149 for node in dangling_nodes]
2151 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2153 "the following nodes (and their instances) belong to a non"
2154 " existing group: %s", utils.CommaJoin(pretty_dangling))
2156 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2158 "the following instances have a non-existing primary-node:"
2159 " %s", utils.CommaJoin(no_node_instances))
2164 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2165 """Verifies the status of a node group.
2168 HPATH = "cluster-verify"
2169 HTYPE = constants.HTYPE_CLUSTER
2172 _HOOKS_INDENT_RE = re.compile("^", re.M)
2174 class NodeImage(object):
2175 """A class representing the logical and physical status of a node.
2178 @ivar name: the node name to which this object refers
2179 @ivar volumes: a structure as returned from
2180 L{ganeti.backend.GetVolumeList} (runtime)
2181 @ivar instances: a list of running instances (runtime)
2182 @ivar pinst: list of configured primary instances (config)
2183 @ivar sinst: list of configured secondary instances (config)
2184 @ivar sbp: dictionary of {primary-node: list of instances} for all
2185 instances for which this node is secondary (config)
2186 @ivar mfree: free memory, as reported by hypervisor (runtime)
2187 @ivar dfree: free disk, as reported by the node (runtime)
2188 @ivar offline: the offline status (config)
2189 @type rpc_fail: boolean
2190 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2191 not whether the individual keys were correct) (runtime)
2192 @type lvm_fail: boolean
2193 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2194 @type hyp_fail: boolean
2195 @ivar hyp_fail: whether the RPC call didn't return the instance list
2196 @type ghost: boolean
2197 @ivar ghost: whether this is a known node or not (config)
2198 @type os_fail: boolean
2199 @ivar os_fail: whether the RPC call didn't return valid OS data
2201 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2202 @type vm_capable: boolean
2203 @ivar vm_capable: whether the node can host instances
2206 def __init__(self, offline=False, name=None, vm_capable=True):
2215 self.offline = offline
2216 self.vm_capable = vm_capable
2217 self.rpc_fail = False
2218 self.lvm_fail = False
2219 self.hyp_fail = False
2221 self.os_fail = False
2224 def ExpandNames(self):
2225 # This raises errors.OpPrereqError on its own:
2226 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2228 # Get instances in node group; this is unsafe and needs verification later
2230 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2232 self.needed_locks = {
2233 locking.LEVEL_INSTANCE: inst_names,
2234 locking.LEVEL_NODEGROUP: [self.group_uuid],
2235 locking.LEVEL_NODE: [],
2238 self.share_locks = _ShareAll()
2240 def DeclareLocks(self, level):
2241 if level == locking.LEVEL_NODE:
2242 # Get members of node group; this is unsafe and needs verification later
2243 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2245 all_inst_info = self.cfg.GetAllInstancesInfo()
2247 # In Exec(), we warn about mirrored instances that have primary and
2248 # secondary living in separate node groups. To fully verify that
2249 # volumes for these instances are healthy, we will need to do an
2250 # extra call to their secondaries. We ensure here those nodes will
2252 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2253 # Important: access only the instances whose lock is owned
2254 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2255 nodes.update(all_inst_info[inst].secondary_nodes)
2257 self.needed_locks[locking.LEVEL_NODE] = nodes
2259 def CheckPrereq(self):
2260 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2261 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2263 group_nodes = set(self.group_info.members)
2265 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2268 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2270 unlocked_instances = \
2271 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2274 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2275 utils.CommaJoin(unlocked_nodes),
2278 if unlocked_instances:
2279 raise errors.OpPrereqError("Missing lock for instances: %s" %
2280 utils.CommaJoin(unlocked_instances),
2283 self.all_node_info = self.cfg.GetAllNodesInfo()
2284 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2286 self.my_node_names = utils.NiceSort(group_nodes)
2287 self.my_inst_names = utils.NiceSort(group_instances)
2289 self.my_node_info = dict((name, self.all_node_info[name])
2290 for name in self.my_node_names)
2292 self.my_inst_info = dict((name, self.all_inst_info[name])
2293 for name in self.my_inst_names)
2295 # We detect here the nodes that will need the extra RPC calls for verifying
2296 # split LV volumes; they should be locked.
2297 extra_lv_nodes = set()
2299 for inst in self.my_inst_info.values():
2300 if inst.disk_template in constants.DTS_INT_MIRROR:
2301 for nname in inst.all_nodes:
2302 if self.all_node_info[nname].group != self.group_uuid:
2303 extra_lv_nodes.add(nname)
2305 unlocked_lv_nodes = \
2306 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2308 if unlocked_lv_nodes:
2309 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2310 utils.CommaJoin(unlocked_lv_nodes),
2312 self.extra_lv_nodes = list(extra_lv_nodes)
2314 def _VerifyNode(self, ninfo, nresult):
2315 """Perform some basic validation on data returned from a node.
2317 - check the result data structure is well formed and has all the
2319 - check ganeti version
2321 @type ninfo: L{objects.Node}
2322 @param ninfo: the node to check
2323 @param nresult: the results from the node
2325 @return: whether overall this call was successful (and we can expect
2326 reasonable values in the respose)
2330 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2332 # main result, nresult should be a non-empty dict
2333 test = not nresult or not isinstance(nresult, dict)
2334 _ErrorIf(test, constants.CV_ENODERPC, node,
2335 "unable to verify node: no data returned")
2339 # compares ganeti version
2340 local_version = constants.PROTOCOL_VERSION
2341 remote_version = nresult.get("version", None)
2342 test = not (remote_version and
2343 isinstance(remote_version, (list, tuple)) and
2344 len(remote_version) == 2)
2345 _ErrorIf(test, constants.CV_ENODERPC, node,
2346 "connection to node returned invalid data")
2350 test = local_version != remote_version[0]
2351 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2352 "incompatible protocol versions: master %s,"
2353 " node %s", local_version, remote_version[0])
2357 # node seems compatible, we can actually try to look into its results
2359 # full package version
2360 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2361 constants.CV_ENODEVERSION, node,
2362 "software version mismatch: master %s, node %s",
2363 constants.RELEASE_VERSION, remote_version[1],
2364 code=self.ETYPE_WARNING)
2366 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2367 if ninfo.vm_capable and isinstance(hyp_result, dict):
2368 for hv_name, hv_result in hyp_result.iteritems():
2369 test = hv_result is not None
2370 _ErrorIf(test, constants.CV_ENODEHV, node,
2371 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2373 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2374 if ninfo.vm_capable and isinstance(hvp_result, list):
2375 for item, hv_name, hv_result in hvp_result:
2376 _ErrorIf(True, constants.CV_ENODEHV, node,
2377 "hypervisor %s parameter verify failure (source %s): %s",
2378 hv_name, item, hv_result)
2380 test = nresult.get(constants.NV_NODESETUP,
2381 ["Missing NODESETUP results"])
2382 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2387 def _VerifyNodeTime(self, ninfo, nresult,
2388 nvinfo_starttime, nvinfo_endtime):
2389 """Check the node time.
2391 @type ninfo: L{objects.Node}
2392 @param ninfo: the node to check
2393 @param nresult: the remote results for the node
2394 @param nvinfo_starttime: the start time of the RPC call
2395 @param nvinfo_endtime: the end time of the RPC call
2399 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2401 ntime = nresult.get(constants.NV_TIME, None)
2403 ntime_merged = utils.MergeTime(ntime)
2404 except (ValueError, TypeError):
2405 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2408 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2409 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2410 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2411 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2415 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2416 "Node time diverges by at least %s from master node time",
2419 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2420 """Check the node LVM results.
2422 @type ninfo: L{objects.Node}
2423 @param ninfo: the node to check
2424 @param nresult: the remote results for the node
2425 @param vg_name: the configured VG name
2432 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2434 # checks vg existence and size > 20G
2435 vglist = nresult.get(constants.NV_VGLIST, None)
2437 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2439 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2440 constants.MIN_VG_SIZE)
2441 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2444 pvlist = nresult.get(constants.NV_PVLIST, None)
2445 test = pvlist is None
2446 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2448 # check that ':' is not present in PV names, since it's a
2449 # special character for lvcreate (denotes the range of PEs to
2451 for _, pvname, owner_vg in pvlist:
2452 test = ":" in pvname
2453 _ErrorIf(test, constants.CV_ENODELVM, node,
2454 "Invalid character ':' in PV '%s' of VG '%s'",
2457 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2458 """Check the node bridges.
2460 @type ninfo: L{objects.Node}
2461 @param ninfo: the node to check
2462 @param nresult: the remote results for the node
2463 @param bridges: the expected list of bridges
2470 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2472 missing = nresult.get(constants.NV_BRIDGES, None)
2473 test = not isinstance(missing, list)
2474 _ErrorIf(test, constants.CV_ENODENET, node,
2475 "did not return valid bridge information")
2477 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2478 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2480 def _VerifyNodeUserScripts(self, ninfo, nresult):
2481 """Check the results of user scripts presence and executability on the node
2483 @type ninfo: L{objects.Node}
2484 @param ninfo: the node to check
2485 @param nresult: the remote results for the node
2490 test = not constants.NV_USERSCRIPTS in nresult
2491 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2492 "did not return user scripts information")
2494 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2496 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2497 "user scripts not present or not executable: %s" %
2498 utils.CommaJoin(sorted(broken_scripts)))
2500 def _VerifyNodeNetwork(self, ninfo, nresult):
2501 """Check the node network connectivity results.
2503 @type ninfo: L{objects.Node}
2504 @param ninfo: the node to check
2505 @param nresult: the remote results for the node
2509 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2511 test = constants.NV_NODELIST not in nresult
2512 _ErrorIf(test, constants.CV_ENODESSH, node,
2513 "node hasn't returned node ssh connectivity data")
2515 if nresult[constants.NV_NODELIST]:
2516 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2517 _ErrorIf(True, constants.CV_ENODESSH, node,
2518 "ssh communication with node '%s': %s", a_node, a_msg)
2520 test = constants.NV_NODENETTEST not in nresult
2521 _ErrorIf(test, constants.CV_ENODENET, node,
2522 "node hasn't returned node tcp connectivity data")
2524 if nresult[constants.NV_NODENETTEST]:
2525 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2527 _ErrorIf(True, constants.CV_ENODENET, node,
2528 "tcp communication with node '%s': %s",
2529 anode, nresult[constants.NV_NODENETTEST][anode])
2531 test = constants.NV_MASTERIP not in nresult
2532 _ErrorIf(test, constants.CV_ENODENET, node,
2533 "node hasn't returned node master IP reachability data")
2535 if not nresult[constants.NV_MASTERIP]:
2536 if node == self.master_node:
2537 msg = "the master node cannot reach the master IP (not configured?)"
2539 msg = "cannot reach the master IP"
2540 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2542 def _VerifyInstance(self, instance, instanceconfig, node_image,
2544 """Verify an instance.
2546 This function checks to see if the required block devices are
2547 available on the instance's node.
2550 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2551 node_current = instanceconfig.primary_node
2553 node_vol_should = {}
2554 instanceconfig.MapLVsByNode(node_vol_should)
2556 cluster = self.cfg.GetClusterInfo()
2557 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2559 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2560 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2561 code=self.ETYPE_WARNING)
2563 for node in node_vol_should:
2564 n_img = node_image[node]
2565 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2566 # ignore missing volumes on offline or broken nodes
2568 for volume in node_vol_should[node]:
2569 test = volume not in n_img.volumes
2570 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2571 "volume %s missing on node %s", volume, node)
2573 if instanceconfig.admin_state == constants.ADMINST_UP:
2574 pri_img = node_image[node_current]
2575 test = instance not in pri_img.instances and not pri_img.offline
2576 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2577 "instance not running on its primary node %s",
2580 diskdata = [(nname, success, status, idx)
2581 for (nname, disks) in diskstatus.items()
2582 for idx, (success, status) in enumerate(disks)]
2584 for nname, success, bdev_status, idx in diskdata:
2585 # the 'ghost node' construction in Exec() ensures that we have a
2587 snode = node_image[nname]
2588 bad_snode = snode.ghost or snode.offline
2589 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2590 not success and not bad_snode,
2591 constants.CV_EINSTANCEFAULTYDISK, instance,
2592 "couldn't retrieve status for disk/%s on %s: %s",
2593 idx, nname, bdev_status)
2594 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2595 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2596 constants.CV_EINSTANCEFAULTYDISK, instance,
2597 "disk/%s on %s is faulty", idx, nname)
2599 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2600 """Verify if there are any unknown volumes in the cluster.
2602 The .os, .swap and backup volumes are ignored. All other volumes are
2603 reported as unknown.
2605 @type reserved: L{ganeti.utils.FieldSet}
2606 @param reserved: a FieldSet of reserved volume names
2609 for node, n_img in node_image.items():
2610 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2611 self.all_node_info[node].group != self.group_uuid):
2612 # skip non-healthy nodes
2614 for volume in n_img.volumes:
2615 test = ((node not in node_vol_should or
2616 volume not in node_vol_should[node]) and
2617 not reserved.Matches(volume))
2618 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2619 "volume %s is unknown", volume)
2621 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2622 """Verify N+1 Memory Resilience.
2624 Check that if one single node dies we can still start all the
2625 instances it was primary for.
2628 cluster_info = self.cfg.GetClusterInfo()
2629 for node, n_img in node_image.items():
2630 # This code checks that every node which is now listed as
2631 # secondary has enough memory to host all instances it is
2632 # supposed to should a single other node in the cluster fail.
2633 # FIXME: not ready for failover to an arbitrary node
2634 # FIXME: does not support file-backed instances
2635 # WARNING: we currently take into account down instances as well
2636 # as up ones, considering that even if they're down someone
2637 # might want to start them even in the event of a node failure.
2638 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2639 # we're skipping nodes marked offline and nodes in other groups from
2640 # the N+1 warning, since most likely we don't have good memory
2641 # infromation from them; we already list instances living on such
2642 # nodes, and that's enough warning
2644 #TODO(dynmem): also consider ballooning out other instances
2645 for prinode, instances in n_img.sbp.items():
2647 for instance in instances:
2648 bep = cluster_info.FillBE(instance_cfg[instance])
2649 if bep[constants.BE_AUTO_BALANCE]:
2650 needed_mem += bep[constants.BE_MINMEM]
2651 test = n_img.mfree < needed_mem
2652 self._ErrorIf(test, constants.CV_ENODEN1, node,
2653 "not enough memory to accomodate instance failovers"
2654 " should node %s fail (%dMiB needed, %dMiB available)",
2655 prinode, needed_mem, n_img.mfree)
2658 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2659 (files_all, files_opt, files_mc, files_vm)):
2660 """Verifies file checksums collected from all nodes.
2662 @param errorif: Callback for reporting errors
2663 @param nodeinfo: List of L{objects.Node} objects
2664 @param master_node: Name of master node
2665 @param all_nvinfo: RPC results
2668 # Define functions determining which nodes to consider for a file
2671 (files_mc, lambda node: (node.master_candidate or
2672 node.name == master_node)),
2673 (files_vm, lambda node: node.vm_capable),
2676 # Build mapping from filename to list of nodes which should have the file
2678 for (files, fn) in files2nodefn:
2680 filenodes = nodeinfo
2682 filenodes = filter(fn, nodeinfo)
2683 nodefiles.update((filename,
2684 frozenset(map(operator.attrgetter("name"), filenodes)))
2685 for filename in files)
2687 assert set(nodefiles) == (files_all | files_mc | files_vm)
2689 fileinfo = dict((filename, {}) for filename in nodefiles)
2690 ignore_nodes = set()
2692 for node in nodeinfo:
2694 ignore_nodes.add(node.name)
2697 nresult = all_nvinfo[node.name]
2699 if nresult.fail_msg or not nresult.payload:
2702 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2703 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2704 for (key, value) in fingerprints.items())
2707 test = not (node_files and isinstance(node_files, dict))
2708 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2709 "Node did not return file checksum data")
2711 ignore_nodes.add(node.name)
2714 # Build per-checksum mapping from filename to nodes having it
2715 for (filename, checksum) in node_files.items():
2716 assert filename in nodefiles
2717 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2719 for (filename, checksums) in fileinfo.items():
2720 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2722 # Nodes having the file
2723 with_file = frozenset(node_name
2724 for nodes in fileinfo[filename].values()
2725 for node_name in nodes) - ignore_nodes
2727 expected_nodes = nodefiles[filename] - ignore_nodes
2729 # Nodes missing file
2730 missing_file = expected_nodes - with_file
2732 if filename in files_opt:
2734 errorif(missing_file and missing_file != expected_nodes,
2735 constants.CV_ECLUSTERFILECHECK, None,
2736 "File %s is optional, but it must exist on all or no"
2737 " nodes (not found on %s)",
2738 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2740 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2741 "File %s is missing from node(s) %s", filename,
2742 utils.CommaJoin(utils.NiceSort(missing_file)))
2744 # Warn if a node has a file it shouldn't
2745 unexpected = with_file - expected_nodes
2747 constants.CV_ECLUSTERFILECHECK, None,
2748 "File %s should not exist on node(s) %s",
2749 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2751 # See if there are multiple versions of the file
2752 test = len(checksums) > 1
2754 variants = ["variant %s on %s" %
2755 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2756 for (idx, (checksum, nodes)) in
2757 enumerate(sorted(checksums.items()))]
2761 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2762 "File %s found with %s different checksums (%s)",
2763 filename, len(checksums), "; ".join(variants))
2765 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2767 """Verifies and the node DRBD status.
2769 @type ninfo: L{objects.Node}
2770 @param ninfo: the node to check
2771 @param nresult: the remote results for the node
2772 @param instanceinfo: the dict of instances
2773 @param drbd_helper: the configured DRBD usermode helper
2774 @param drbd_map: the DRBD map as returned by
2775 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2779 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2782 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2783 test = (helper_result is None)
2784 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2785 "no drbd usermode helper returned")
2787 status, payload = helper_result
2789 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2790 "drbd usermode helper check unsuccessful: %s", payload)
2791 test = status and (payload != drbd_helper)
2792 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2793 "wrong drbd usermode helper: %s", payload)
2795 # compute the DRBD minors
2797 for minor, instance in drbd_map[node].items():
2798 test = instance not in instanceinfo
2799 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2800 "ghost instance '%s' in temporary DRBD map", instance)
2801 # ghost instance should not be running, but otherwise we
2802 # don't give double warnings (both ghost instance and
2803 # unallocated minor in use)
2805 node_drbd[minor] = (instance, False)
2807 instance = instanceinfo[instance]
2808 node_drbd[minor] = (instance.name,
2809 instance.admin_state == constants.ADMINST_UP)
2811 # and now check them
2812 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2813 test = not isinstance(used_minors, (tuple, list))
2814 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2815 "cannot parse drbd status file: %s", str(used_minors))
2817 # we cannot check drbd status
2820 for minor, (iname, must_exist) in node_drbd.items():
2821 test = minor not in used_minors and must_exist
2822 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2823 "drbd minor %d of instance %s is not active", minor, iname)
2824 for minor in used_minors:
2825 test = minor not in node_drbd
2826 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2827 "unallocated drbd minor %d is in use", minor)
2829 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2830 """Builds the node OS structures.
2832 @type ninfo: L{objects.Node}
2833 @param ninfo: the node to check
2834 @param nresult: the remote results for the node
2835 @param nimg: the node image object
2839 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2841 remote_os = nresult.get(constants.NV_OSLIST, None)
2842 test = (not isinstance(remote_os, list) or
2843 not compat.all(isinstance(v, list) and len(v) == 7
2844 for v in remote_os))
2846 _ErrorIf(test, constants.CV_ENODEOS, node,
2847 "node hasn't returned valid OS data")
2856 for (name, os_path, status, diagnose,
2857 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2859 if name not in os_dict:
2862 # parameters is a list of lists instead of list of tuples due to
2863 # JSON lacking a real tuple type, fix it:
2864 parameters = [tuple(v) for v in parameters]
2865 os_dict[name].append((os_path, status, diagnose,
2866 set(variants), set(parameters), set(api_ver)))
2868 nimg.oslist = os_dict
2870 def _VerifyNodeOS(self, ninfo, nimg, base):
2871 """Verifies the node OS list.
2873 @type ninfo: L{objects.Node}
2874 @param ninfo: the node to check
2875 @param nimg: the node image object
2876 @param base: the 'template' node we match against (e.g. from the master)
2880 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2882 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2884 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2885 for os_name, os_data in nimg.oslist.items():
2886 assert os_data, "Empty OS status for OS %s?!" % os_name
2887 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2888 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2889 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2890 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2891 "OS '%s' has multiple entries (first one shadows the rest): %s",
2892 os_name, utils.CommaJoin([v[0] for v in os_data]))
2893 # comparisons with the 'base' image
2894 test = os_name not in base.oslist
2895 _ErrorIf(test, constants.CV_ENODEOS, node,
2896 "Extra OS %s not present on reference node (%s)",
2900 assert base.oslist[os_name], "Base node has empty OS status?"
2901 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2903 # base OS is invalid, skipping
2905 for kind, a, b in [("API version", f_api, b_api),
2906 ("variants list", f_var, b_var),
2907 ("parameters", beautify_params(f_param),
2908 beautify_params(b_param))]:
2909 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2910 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2911 kind, os_name, base.name,
2912 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2914 # check any missing OSes
2915 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2916 _ErrorIf(missing, constants.CV_ENODEOS, node,
2917 "OSes present on reference node %s but missing on this node: %s",
2918 base.name, utils.CommaJoin(missing))
2920 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2921 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2923 @type ninfo: L{objects.Node}
2924 @param ninfo: the node to check
2925 @param nresult: the remote results for the node
2926 @type is_master: bool
2927 @param is_master: Whether node is the master node
2933 (constants.ENABLE_FILE_STORAGE or
2934 constants.ENABLE_SHARED_FILE_STORAGE)):
2936 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2938 # This should never happen
2939 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2940 "Node did not return forbidden file storage paths")
2942 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2943 "Found forbidden file storage paths: %s",
2944 utils.CommaJoin(fspaths))
2946 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2947 constants.CV_ENODEFILESTORAGEPATHS, node,
2948 "Node should not have returned forbidden file storage"
2951 def _VerifyOob(self, ninfo, nresult):
2952 """Verifies out of band functionality of a node.
2954 @type ninfo: L{objects.Node}
2955 @param ninfo: the node to check
2956 @param nresult: the remote results for the node
2960 # We just have to verify the paths on master and/or master candidates
2961 # as the oob helper is invoked on the master
2962 if ((ninfo.master_candidate or ninfo.master_capable) and
2963 constants.NV_OOB_PATHS in nresult):
2964 for path_result in nresult[constants.NV_OOB_PATHS]:
2965 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2967 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2968 """Verifies and updates the node volume data.
2970 This function will update a L{NodeImage}'s internal structures
2971 with data from the remote call.
2973 @type ninfo: L{objects.Node}
2974 @param ninfo: the node to check
2975 @param nresult: the remote results for the node
2976 @param nimg: the node image object
2977 @param vg_name: the configured VG name
2981 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2983 nimg.lvm_fail = True
2984 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2987 elif isinstance(lvdata, basestring):
2988 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2989 utils.SafeEncode(lvdata))
2990 elif not isinstance(lvdata, dict):
2991 _ErrorIf(True, constants.CV_ENODELVM, node,
2992 "rpc call to node failed (lvlist)")
2994 nimg.volumes = lvdata
2995 nimg.lvm_fail = False
2997 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2998 """Verifies and updates the node instance list.
3000 If the listing was successful, then updates this node's instance
3001 list. Otherwise, it marks the RPC call as failed for the instance
3004 @type ninfo: L{objects.Node}
3005 @param ninfo: the node to check
3006 @param nresult: the remote results for the node
3007 @param nimg: the node image object
3010 idata = nresult.get(constants.NV_INSTANCELIST, None)
3011 test = not isinstance(idata, list)
3012 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3013 "rpc call to node failed (instancelist): %s",
3014 utils.SafeEncode(str(idata)))
3016 nimg.hyp_fail = True
3018 nimg.instances = idata
3020 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3021 """Verifies and computes a node information map
3023 @type ninfo: L{objects.Node}
3024 @param ninfo: the node to check
3025 @param nresult: the remote results for the node
3026 @param nimg: the node image object
3027 @param vg_name: the configured VG name
3031 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3033 # try to read free memory (from the hypervisor)
3034 hv_info = nresult.get(constants.NV_HVINFO, None)
3035 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3036 _ErrorIf(test, constants.CV_ENODEHV, node,
3037 "rpc call to node failed (hvinfo)")
3040 nimg.mfree = int(hv_info["memory_free"])
3041 except (ValueError, TypeError):
3042 _ErrorIf(True, constants.CV_ENODERPC, node,
3043 "node returned invalid nodeinfo, check hypervisor")
3045 # FIXME: devise a free space model for file based instances as well
3046 if vg_name is not None:
3047 test = (constants.NV_VGLIST not in nresult or
3048 vg_name not in nresult[constants.NV_VGLIST])
3049 _ErrorIf(test, constants.CV_ENODELVM, node,
3050 "node didn't return data for the volume group '%s'"
3051 " - it is either missing or broken", vg_name)
3054 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3055 except (ValueError, TypeError):
3056 _ErrorIf(True, constants.CV_ENODERPC, node,
3057 "node returned invalid LVM info, check LVM status")
3059 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3060 """Gets per-disk status information for all instances.
3062 @type nodelist: list of strings
3063 @param nodelist: Node names
3064 @type node_image: dict of (name, L{objects.Node})
3065 @param node_image: Node objects
3066 @type instanceinfo: dict of (name, L{objects.Instance})
3067 @param instanceinfo: Instance objects
3068 @rtype: {instance: {node: [(succes, payload)]}}
3069 @return: a dictionary of per-instance dictionaries with nodes as
3070 keys and disk information as values; the disk information is a
3071 list of tuples (success, payload)
3074 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3077 node_disks_devonly = {}
3078 diskless_instances = set()
3079 diskless = constants.DT_DISKLESS
3081 for nname in nodelist:
3082 node_instances = list(itertools.chain(node_image[nname].pinst,
3083 node_image[nname].sinst))
3084 diskless_instances.update(inst for inst in node_instances
3085 if instanceinfo[inst].disk_template == diskless)
3086 disks = [(inst, disk)
3087 for inst in node_instances
3088 for disk in instanceinfo[inst].disks]
3091 # No need to collect data
3094 node_disks[nname] = disks
3096 # _AnnotateDiskParams makes already copies of the disks
3098 for (inst, dev) in disks:
3099 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3100 self.cfg.SetDiskID(anno_disk, nname)
3101 devonly.append(anno_disk)
3103 node_disks_devonly[nname] = devonly
3105 assert len(node_disks) == len(node_disks_devonly)
3107 # Collect data from all nodes with disks
3108 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3111 assert len(result) == len(node_disks)
3115 for (nname, nres) in result.items():
3116 disks = node_disks[nname]
3119 # No data from this node
3120 data = len(disks) * [(False, "node offline")]
3123 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3124 "while getting disk information: %s", msg)
3126 # No data from this node
3127 data = len(disks) * [(False, msg)]
3130 for idx, i in enumerate(nres.payload):
3131 if isinstance(i, (tuple, list)) and len(i) == 2:
3134 logging.warning("Invalid result from node %s, entry %d: %s",
3136 data.append((False, "Invalid result from the remote node"))
3138 for ((inst, _), status) in zip(disks, data):
3139 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3141 # Add empty entries for diskless instances.
3142 for inst in diskless_instances:
3143 assert inst not in instdisk
3146 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3147 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3148 compat.all(isinstance(s, (tuple, list)) and
3149 len(s) == 2 for s in statuses)
3150 for inst, nnames in instdisk.items()
3151 for nname, statuses in nnames.items())
3152 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3157 def _SshNodeSelector(group_uuid, all_nodes):
3158 """Create endless iterators for all potential SSH check hosts.
3161 nodes = [node for node in all_nodes
3162 if (node.group != group_uuid and
3164 keyfunc = operator.attrgetter("group")
3166 return map(itertools.cycle,
3167 [sorted(map(operator.attrgetter("name"), names))
3168 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3172 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3173 """Choose which nodes should talk to which other nodes.
3175 We will make nodes contact all nodes in their group, and one node from
3178 @warning: This algorithm has a known issue if one node group is much
3179 smaller than others (e.g. just one node). In such a case all other
3180 nodes will talk to the single node.
3183 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3184 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3186 return (online_nodes,
3187 dict((name, sorted([i.next() for i in sel]))
3188 for name in online_nodes))
3190 def BuildHooksEnv(self):
3193 Cluster-Verify hooks just ran in the post phase and their failure makes
3194 the output be logged in the verify output and the verification to fail.
3198 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3201 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3202 for node in self.my_node_info.values())
3206 def BuildHooksNodes(self):
3207 """Build hooks nodes.
3210 return ([], self.my_node_names)
3212 def Exec(self, feedback_fn):
3213 """Verify integrity of the node group, performing various test on nodes.
3216 # This method has too many local variables. pylint: disable=R0914
3217 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3219 if not self.my_node_names:
3221 feedback_fn("* Empty node group, skipping verification")
3225 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3226 verbose = self.op.verbose
3227 self._feedback_fn = feedback_fn
3229 vg_name = self.cfg.GetVGName()
3230 drbd_helper = self.cfg.GetDRBDHelper()
3231 cluster = self.cfg.GetClusterInfo()
3232 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3233 hypervisors = cluster.enabled_hypervisors
3234 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3236 i_non_redundant = [] # Non redundant instances
3237 i_non_a_balanced = [] # Non auto-balanced instances
3238 i_offline = 0 # Count of offline instances
3239 n_offline = 0 # Count of offline nodes
3240 n_drained = 0 # Count of nodes being drained
3241 node_vol_should = {}
3243 # FIXME: verify OS list
3246 filemap = _ComputeAncillaryFiles(cluster, False)
3248 # do local checksums
3249 master_node = self.master_node = self.cfg.GetMasterNode()
3250 master_ip = self.cfg.GetMasterIP()
3252 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3255 if self.cfg.GetUseExternalMipScript():
3256 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3258 node_verify_param = {
3259 constants.NV_FILELIST:
3260 map(vcluster.MakeVirtualPath,
3261 utils.UniqueSequence(filename
3262 for files in filemap
3263 for filename in files)),
3264 constants.NV_NODELIST:
3265 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3266 self.all_node_info.values()),
3267 constants.NV_HYPERVISOR: hypervisors,
3268 constants.NV_HVPARAMS:
3269 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3270 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3271 for node in node_data_list
3272 if not node.offline],
3273 constants.NV_INSTANCELIST: hypervisors,
3274 constants.NV_VERSION: None,
3275 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3276 constants.NV_NODESETUP: None,
3277 constants.NV_TIME: None,
3278 constants.NV_MASTERIP: (master_node, master_ip),
3279 constants.NV_OSLIST: None,
3280 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3281 constants.NV_USERSCRIPTS: user_scripts,
3284 if vg_name is not None:
3285 node_verify_param[constants.NV_VGLIST] = None
3286 node_verify_param[constants.NV_LVLIST] = vg_name
3287 node_verify_param[constants.NV_PVLIST] = [vg_name]
3290 node_verify_param[constants.NV_DRBDLIST] = None
3291 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3293 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3294 # Load file storage paths only from master node
3295 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3298 # FIXME: this needs to be changed per node-group, not cluster-wide
3300 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3301 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3302 bridges.add(default_nicpp[constants.NIC_LINK])
3303 for instance in self.my_inst_info.values():
3304 for nic in instance.nics:
3305 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3306 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3307 bridges.add(full_nic[constants.NIC_LINK])
3310 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3312 # Build our expected cluster state
3313 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3315 vm_capable=node.vm_capable))
3316 for node in node_data_list)
3320 for node in self.all_node_info.values():
3321 path = _SupportsOob(self.cfg, node)
3322 if path and path not in oob_paths:
3323 oob_paths.append(path)
3326 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3328 for instance in self.my_inst_names:
3329 inst_config = self.my_inst_info[instance]
3330 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3333 for nname in inst_config.all_nodes:
3334 if nname not in node_image:
3335 gnode = self.NodeImage(name=nname)
3336 gnode.ghost = (nname not in self.all_node_info)
3337 node_image[nname] = gnode
3339 inst_config.MapLVsByNode(node_vol_should)
3341 pnode = inst_config.primary_node
3342 node_image[pnode].pinst.append(instance)
3344 for snode in inst_config.secondary_nodes:
3345 nimg = node_image[snode]
3346 nimg.sinst.append(instance)
3347 if pnode not in nimg.sbp:
3348 nimg.sbp[pnode] = []
3349 nimg.sbp[pnode].append(instance)
3351 # At this point, we have the in-memory data structures complete,
3352 # except for the runtime information, which we'll gather next
3354 # Due to the way our RPC system works, exact response times cannot be
3355 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3356 # time before and after executing the request, we can at least have a time
3358 nvinfo_starttime = time.time()
3359 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3361 self.cfg.GetClusterName())
3362 nvinfo_endtime = time.time()
3364 if self.extra_lv_nodes and vg_name is not None:
3366 self.rpc.call_node_verify(self.extra_lv_nodes,
3367 {constants.NV_LVLIST: vg_name},
3368 self.cfg.GetClusterName())
3370 extra_lv_nvinfo = {}
3372 all_drbd_map = self.cfg.ComputeDRBDMap()
3374 feedback_fn("* Gathering disk information (%s nodes)" %
3375 len(self.my_node_names))
3376 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3379 feedback_fn("* Verifying configuration file consistency")
3381 # If not all nodes are being checked, we need to make sure the master node
3382 # and a non-checked vm_capable node are in the list.
3383 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3385 vf_nvinfo = all_nvinfo.copy()
3386 vf_node_info = list(self.my_node_info.values())
3387 additional_nodes = []
3388 if master_node not in self.my_node_info:
3389 additional_nodes.append(master_node)
3390 vf_node_info.append(self.all_node_info[master_node])
3391 # Add the first vm_capable node we find which is not included,
3392 # excluding the master node (which we already have)
3393 for node in absent_nodes:
3394 nodeinfo = self.all_node_info[node]
3395 if (nodeinfo.vm_capable and not nodeinfo.offline and
3396 node != master_node):
3397 additional_nodes.append(node)
3398 vf_node_info.append(self.all_node_info[node])
3400 key = constants.NV_FILELIST
3401 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3402 {key: node_verify_param[key]},
3403 self.cfg.GetClusterName()))
3405 vf_nvinfo = all_nvinfo
3406 vf_node_info = self.my_node_info.values()
3408 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3410 feedback_fn("* Verifying node status")
3414 for node_i in node_data_list:
3416 nimg = node_image[node]
3420 feedback_fn("* Skipping offline node %s" % (node,))
3424 if node == master_node:
3426 elif node_i.master_candidate:
3427 ntype = "master candidate"
3428 elif node_i.drained:
3434 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3436 msg = all_nvinfo[node].fail_msg
3437 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3440 nimg.rpc_fail = True
3443 nresult = all_nvinfo[node].payload
3445 nimg.call_ok = self._VerifyNode(node_i, nresult)
3446 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3447 self._VerifyNodeNetwork(node_i, nresult)
3448 self._VerifyNodeUserScripts(node_i, nresult)
3449 self._VerifyOob(node_i, nresult)
3450 self._VerifyFileStoragePaths(node_i, nresult,
3451 node == master_node)
3454 self._VerifyNodeLVM(node_i, nresult, vg_name)
3455 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3458 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3459 self._UpdateNodeInstances(node_i, nresult, nimg)
3460 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3461 self._UpdateNodeOS(node_i, nresult, nimg)
3463 if not nimg.os_fail:
3464 if refos_img is None:
3466 self._VerifyNodeOS(node_i, nimg, refos_img)
3467 self._VerifyNodeBridges(node_i, nresult, bridges)
3469 # Check whether all running instancies are primary for the node. (This
3470 # can no longer be done from _VerifyInstance below, since some of the
3471 # wrong instances could be from other node groups.)
3472 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3474 for inst in non_primary_inst:
3475 test = inst in self.all_inst_info
3476 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3477 "instance should not run on node %s", node_i.name)
3478 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3479 "node is running unknown instance %s", inst)
3481 for node, result in extra_lv_nvinfo.items():
3482 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3483 node_image[node], vg_name)
3485 feedback_fn("* Verifying instance status")
3486 for instance in self.my_inst_names:
3488 feedback_fn("* Verifying instance %s" % instance)
3489 inst_config = self.my_inst_info[instance]
3490 self._VerifyInstance(instance, inst_config, node_image,
3492 inst_nodes_offline = []
3494 pnode = inst_config.primary_node
3495 pnode_img = node_image[pnode]
3496 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3497 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3498 " primary node failed", instance)
3500 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3502 constants.CV_EINSTANCEBADNODE, instance,
3503 "instance is marked as running and lives on offline node %s",
3504 inst_config.primary_node)
3506 # If the instance is non-redundant we cannot survive losing its primary
3507 # node, so we are not N+1 compliant.
3508 if inst_config.disk_template not in constants.DTS_MIRRORED:
3509 i_non_redundant.append(instance)
3511 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3512 constants.CV_EINSTANCELAYOUT,
3513 instance, "instance has multiple secondary nodes: %s",
3514 utils.CommaJoin(inst_config.secondary_nodes),
3515 code=self.ETYPE_WARNING)
3517 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3518 pnode = inst_config.primary_node
3519 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3520 instance_groups = {}
3522 for node in instance_nodes:
3523 instance_groups.setdefault(self.all_node_info[node].group,
3527 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3528 # Sort so that we always list the primary node first.
3529 for group, nodes in sorted(instance_groups.items(),
3530 key=lambda (_, nodes): pnode in nodes,
3533 self._ErrorIf(len(instance_groups) > 1,
3534 constants.CV_EINSTANCESPLITGROUPS,
3535 instance, "instance has primary and secondary nodes in"
3536 " different groups: %s", utils.CommaJoin(pretty_list),
3537 code=self.ETYPE_WARNING)
3539 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3540 i_non_a_balanced.append(instance)
3542 for snode in inst_config.secondary_nodes:
3543 s_img = node_image[snode]
3544 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3545 snode, "instance %s, connection to secondary node failed",
3549 inst_nodes_offline.append(snode)
3551 # warn that the instance lives on offline nodes
3552 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3553 "instance has offline secondary node(s) %s",
3554 utils.CommaJoin(inst_nodes_offline))
3555 # ... or ghost/non-vm_capable nodes
3556 for node in inst_config.all_nodes:
3557 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3558 instance, "instance lives on ghost node %s", node)
3559 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3560 instance, "instance lives on non-vm_capable node %s", node)
3562 feedback_fn("* Verifying orphan volumes")
3563 reserved = utils.FieldSet(*cluster.reserved_lvs)
3565 # We will get spurious "unknown volume" warnings if any node of this group
3566 # is secondary for an instance whose primary is in another group. To avoid
3567 # them, we find these instances and add their volumes to node_vol_should.
3568 for inst in self.all_inst_info.values():
3569 for secondary in inst.secondary_nodes:
3570 if (secondary in self.my_node_info
3571 and inst.name not in self.my_inst_info):
3572 inst.MapLVsByNode(node_vol_should)
3575 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3577 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3578 feedback_fn("* Verifying N+1 Memory redundancy")
3579 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3581 feedback_fn("* Other Notes")
3583 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3584 % len(i_non_redundant))
3586 if i_non_a_balanced:
3587 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3588 % len(i_non_a_balanced))
3591 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3594 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3597 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3601 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3602 """Analyze the post-hooks' result
3604 This method analyses the hook result, handles it, and sends some
3605 nicely-formatted feedback back to the user.
3607 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3608 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3609 @param hooks_results: the results of the multi-node hooks rpc call
3610 @param feedback_fn: function used send feedback back to the caller
3611 @param lu_result: previous Exec result
3612 @return: the new Exec result, based on the previous result
3616 # We only really run POST phase hooks, only for non-empty groups,
3617 # and are only interested in their results
3618 if not self.my_node_names:
3621 elif phase == constants.HOOKS_PHASE_POST:
3622 # Used to change hooks' output to proper indentation
3623 feedback_fn("* Hooks Results")
3624 assert hooks_results, "invalid result from hooks"
3626 for node_name in hooks_results:
3627 res = hooks_results[node_name]
3629 test = msg and not res.offline
3630 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3631 "Communication failure in hooks execution: %s", msg)
3632 if res.offline or msg:
3633 # No need to investigate payload if node is offline or gave
3636 for script, hkr, output in res.payload:
3637 test = hkr == constants.HKR_FAIL
3638 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3639 "Script %s failed, output:", script)
3641 output = self._HOOKS_INDENT_RE.sub(" ", output)
3642 feedback_fn("%s" % output)
3648 class LUClusterVerifyDisks(NoHooksLU):
3649 """Verifies the cluster disks status.
3654 def ExpandNames(self):
3655 self.share_locks = _ShareAll()
3656 self.needed_locks = {
3657 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3660 def Exec(self, feedback_fn):
3661 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3663 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3664 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3665 for group in group_names])
3668 class LUGroupVerifyDisks(NoHooksLU):
3669 """Verifies the status of all disks in a node group.
3674 def ExpandNames(self):
3675 # Raises errors.OpPrereqError on its own if group can't be found
3676 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3678 self.share_locks = _ShareAll()
3679 self.needed_locks = {
3680 locking.LEVEL_INSTANCE: [],
3681 locking.LEVEL_NODEGROUP: [],
3682 locking.LEVEL_NODE: [],
3685 def DeclareLocks(self, level):
3686 if level == locking.LEVEL_INSTANCE:
3687 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3689 # Lock instances optimistically, needs verification once node and group
3690 # locks have been acquired
3691 self.needed_locks[locking.LEVEL_INSTANCE] = \
3692 self.cfg.GetNodeGroupInstances(self.group_uuid)
3694 elif level == locking.LEVEL_NODEGROUP:
3695 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3697 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3698 set([self.group_uuid] +
3699 # Lock all groups used by instances optimistically; this requires
3700 # going via the node before it's locked, requiring verification
3703 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3704 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3706 elif level == locking.LEVEL_NODE:
3707 # This will only lock the nodes in the group to be verified which contain
3709 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3710 self._LockInstancesNodes()
3712 # Lock all nodes in group to be verified
3713 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3714 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3715 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3717 def CheckPrereq(self):
3718 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3719 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3720 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3722 assert self.group_uuid in owned_groups
3724 # Check if locked instances are still correct
3725 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3727 # Get instance information
3728 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3730 # Check if node groups for locked instances are still correct
3731 _CheckInstancesNodeGroups(self.cfg, self.instances,
3732 owned_groups, owned_nodes, self.group_uuid)
3734 def Exec(self, feedback_fn):
3735 """Verify integrity of cluster disks.
3737 @rtype: tuple of three items
3738 @return: a tuple of (dict of node-to-node_error, list of instances
3739 which need activate-disks, dict of instance: (node, volume) for
3744 res_instances = set()
3747 nv_dict = _MapInstanceDisksToNodes(
3748 [inst for inst in self.instances.values()
3749 if inst.admin_state == constants.ADMINST_UP])
3752 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3753 set(self.cfg.GetVmCapableNodeList()))
3755 node_lvs = self.rpc.call_lv_list(nodes, [])
3757 for (node, node_res) in node_lvs.items():
3758 if node_res.offline:
3761 msg = node_res.fail_msg
3763 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3764 res_nodes[node] = msg
3767 for lv_name, (_, _, lv_online) in node_res.payload.items():
3768 inst = nv_dict.pop((node, lv_name), None)
3769 if not (lv_online or inst is None):
3770 res_instances.add(inst)
3772 # any leftover items in nv_dict are missing LVs, let's arrange the data
3774 for key, inst in nv_dict.iteritems():
3775 res_missing.setdefault(inst, []).append(list(key))
3777 return (res_nodes, list(res_instances), res_missing)
3780 class LUClusterRepairDiskSizes(NoHooksLU):
3781 """Verifies the cluster disks sizes.
3786 def ExpandNames(self):
3787 if self.op.instances:
3788 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3789 self.needed_locks = {
3790 locking.LEVEL_NODE_RES: [],
3791 locking.LEVEL_INSTANCE: self.wanted_names,
3793 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3795 self.wanted_names = None
3796 self.needed_locks = {
3797 locking.LEVEL_NODE_RES: locking.ALL_SET,
3798 locking.LEVEL_INSTANCE: locking.ALL_SET,
3800 self.share_locks = {
3801 locking.LEVEL_NODE_RES: 1,
3802 locking.LEVEL_INSTANCE: 0,
3805 def DeclareLocks(self, level):
3806 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3807 self._LockInstancesNodes(primary_only=True, level=level)
3809 def CheckPrereq(self):
3810 """Check prerequisites.
3812 This only checks the optional instance list against the existing names.
3815 if self.wanted_names is None:
3816 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3818 self.wanted_instances = \
3819 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3821 def _EnsureChildSizes(self, disk):
3822 """Ensure children of the disk have the needed disk size.
3824 This is valid mainly for DRBD8 and fixes an issue where the
3825 children have smaller disk size.
3827 @param disk: an L{ganeti.objects.Disk} object
3830 if disk.dev_type == constants.LD_DRBD8:
3831 assert disk.children, "Empty children for DRBD8?"
3832 fchild = disk.children[0]
3833 mismatch = fchild.size < disk.size
3835 self.LogInfo("Child disk has size %d, parent %d, fixing",
3836 fchild.size, disk.size)
3837 fchild.size = disk.size
3839 # and we recurse on this child only, not on the metadev
3840 return self._EnsureChildSizes(fchild) or mismatch
3844 def Exec(self, feedback_fn):
3845 """Verify the size of cluster disks.
3848 # TODO: check child disks too
3849 # TODO: check differences in size between primary/secondary nodes
3851 for instance in self.wanted_instances:
3852 pnode = instance.primary_node
3853 if pnode not in per_node_disks:
3854 per_node_disks[pnode] = []
3855 for idx, disk in enumerate(instance.disks):
3856 per_node_disks[pnode].append((instance, idx, disk))
3858 assert not (frozenset(per_node_disks.keys()) -
3859 self.owned_locks(locking.LEVEL_NODE_RES)), \
3860 "Not owning correct locks"
3861 assert not self.owned_locks(locking.LEVEL_NODE)
3864 for node, dskl in per_node_disks.items():
3865 newl = [v[2].Copy() for v in dskl]
3867 self.cfg.SetDiskID(dsk, node)
3868 result = self.rpc.call_blockdev_getsize(node, newl)
3870 self.LogWarning("Failure in blockdev_getsize call to node"
3871 " %s, ignoring", node)
3873 if len(result.payload) != len(dskl):
3874 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3875 " result.payload=%s", node, len(dskl), result.payload)
3876 self.LogWarning("Invalid result from node %s, ignoring node results",
3879 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3881 self.LogWarning("Disk %d of instance %s did not return size"
3882 " information, ignoring", idx, instance.name)
3884 if not isinstance(size, (int, long)):
3885 self.LogWarning("Disk %d of instance %s did not return valid"
3886 " size information, ignoring", idx, instance.name)
3889 if size != disk.size:
3890 self.LogInfo("Disk %d of instance %s has mismatched size,"
3891 " correcting: recorded %d, actual %d", idx,
3892 instance.name, disk.size, size)
3894 self.cfg.Update(instance, feedback_fn)
3895 changed.append((instance.name, idx, size))
3896 if self._EnsureChildSizes(disk):
3897 self.cfg.Update(instance, feedback_fn)
3898 changed.append((instance.name, idx, disk.size))
3902 class LUClusterRename(LogicalUnit):
3903 """Rename the cluster.
3906 HPATH = "cluster-rename"
3907 HTYPE = constants.HTYPE_CLUSTER
3909 def BuildHooksEnv(self):
3914 "OP_TARGET": self.cfg.GetClusterName(),
3915 "NEW_NAME": self.op.name,
3918 def BuildHooksNodes(self):
3919 """Build hooks nodes.
3922 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3924 def CheckPrereq(self):
3925 """Verify that the passed name is a valid one.
3928 hostname = netutils.GetHostname(name=self.op.name,
3929 family=self.cfg.GetPrimaryIPFamily())
3931 new_name = hostname.name
3932 self.ip = new_ip = hostname.ip
3933 old_name = self.cfg.GetClusterName()
3934 old_ip = self.cfg.GetMasterIP()
3935 if new_name == old_name and new_ip == old_ip:
3936 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3937 " cluster has changed",
3939 if new_ip != old_ip:
3940 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3941 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3942 " reachable on the network" %
3943 new_ip, errors.ECODE_NOTUNIQUE)
3945 self.op.name = new_name
3947 def Exec(self, feedback_fn):
3948 """Rename the cluster.
3951 clustername = self.op.name
3954 # shutdown the master IP
3955 master_params = self.cfg.GetMasterNetworkParameters()
3956 ems = self.cfg.GetUseExternalMipScript()
3957 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3959 result.Raise("Could not disable the master role")
3962 cluster = self.cfg.GetClusterInfo()
3963 cluster.cluster_name = clustername
3964 cluster.master_ip = new_ip
3965 self.cfg.Update(cluster, feedback_fn)
3967 # update the known hosts file
3968 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3969 node_list = self.cfg.GetOnlineNodeList()
3971 node_list.remove(master_params.name)
3974 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3976 master_params.ip = new_ip
3977 result = self.rpc.call_node_activate_master_ip(master_params.name,
3979 msg = result.fail_msg
3981 self.LogWarning("Could not re-enable the master role on"
3982 " the master, please restart manually: %s", msg)
3987 def _ValidateNetmask(cfg, netmask):
3988 """Checks if a netmask is valid.
3990 @type cfg: L{config.ConfigWriter}
3991 @param cfg: The cluster configuration
3993 @param netmask: the netmask to be verified
3994 @raise errors.OpPrereqError: if the validation fails
3997 ip_family = cfg.GetPrimaryIPFamily()
3999 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4000 except errors.ProgrammerError:
4001 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4002 ip_family, errors.ECODE_INVAL)
4003 if not ipcls.ValidateNetmask(netmask):
4004 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4005 (netmask), errors.ECODE_INVAL)
4008 class LUClusterSetParams(LogicalUnit):
4009 """Change the parameters of the cluster.
4012 HPATH = "cluster-modify"
4013 HTYPE = constants.HTYPE_CLUSTER
4016 def CheckArguments(self):
4020 if self.op.uid_pool:
4021 uidpool.CheckUidPool(self.op.uid_pool)
4023 if self.op.add_uids:
4024 uidpool.CheckUidPool(self.op.add_uids)
4026 if self.op.remove_uids:
4027 uidpool.CheckUidPool(self.op.remove_uids)
4029 if self.op.master_netmask is not None:
4030 _ValidateNetmask(self.cfg, self.op.master_netmask)
4032 if self.op.diskparams:
4033 for dt_params in self.op.diskparams.values():
4034 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4036 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4037 except errors.OpPrereqError, err:
4038 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4041 def ExpandNames(self):
4042 # FIXME: in the future maybe other cluster params won't require checking on
4043 # all nodes to be modified.
4044 self.needed_locks = {
4045 locking.LEVEL_NODE: locking.ALL_SET,
4046 locking.LEVEL_INSTANCE: locking.ALL_SET,
4047 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4049 self.share_locks = {
4050 locking.LEVEL_NODE: 1,
4051 locking.LEVEL_INSTANCE: 1,
4052 locking.LEVEL_NODEGROUP: 1,
4055 def BuildHooksEnv(self):
4060 "OP_TARGET": self.cfg.GetClusterName(),
4061 "NEW_VG_NAME": self.op.vg_name,
4064 def BuildHooksNodes(self):
4065 """Build hooks nodes.
4068 mn = self.cfg.GetMasterNode()
4071 def CheckPrereq(self):
4072 """Check prerequisites.
4074 This checks whether the given params don't conflict and
4075 if the given volume group is valid.
4078 if self.op.vg_name is not None and not self.op.vg_name:
4079 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4080 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4081 " instances exist", errors.ECODE_INVAL)
4083 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4084 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4085 raise errors.OpPrereqError("Cannot disable drbd helper while"
4086 " drbd-based instances exist",
4089 node_list = self.owned_locks(locking.LEVEL_NODE)
4091 # if vg_name not None, checks given volume group on all nodes
4093 vglist = self.rpc.call_vg_list(node_list)
4094 for node in node_list:
4095 msg = vglist[node].fail_msg
4097 # ignoring down node
4098 self.LogWarning("Error while gathering data on node %s"
4099 " (ignoring node): %s", node, msg)
4101 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4103 constants.MIN_VG_SIZE)
4105 raise errors.OpPrereqError("Error on node '%s': %s" %
4106 (node, vgstatus), errors.ECODE_ENVIRON)
4108 if self.op.drbd_helper:
4109 # checks given drbd helper on all nodes
4110 helpers = self.rpc.call_drbd_helper(node_list)
4111 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4113 self.LogInfo("Not checking drbd helper on offline node %s", node)
4115 msg = helpers[node].fail_msg
4117 raise errors.OpPrereqError("Error checking drbd helper on node"
4118 " '%s': %s" % (node, msg),
4119 errors.ECODE_ENVIRON)
4120 node_helper = helpers[node].payload
4121 if node_helper != self.op.drbd_helper:
4122 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4123 (node, node_helper), errors.ECODE_ENVIRON)
4125 self.cluster = cluster = self.cfg.GetClusterInfo()
4126 # validate params changes
4127 if self.op.beparams:
4128 objects.UpgradeBeParams(self.op.beparams)
4129 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4130 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4132 if self.op.ndparams:
4133 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4134 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4136 # TODO: we need a more general way to handle resetting
4137 # cluster-level parameters to default values
4138 if self.new_ndparams["oob_program"] == "":
4139 self.new_ndparams["oob_program"] = \
4140 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4142 if self.op.hv_state:
4143 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4144 self.cluster.hv_state_static)
4145 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4146 for hv, values in new_hv_state.items())
4148 if self.op.disk_state:
4149 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4150 self.cluster.disk_state_static)
4151 self.new_disk_state = \
4152 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4153 for name, values in svalues.items()))
4154 for storage, svalues in new_disk_state.items())
4157 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4160 all_instances = self.cfg.GetAllInstancesInfo().values()
4162 for group in self.cfg.GetAllNodeGroupsInfo().values():
4163 instances = frozenset([inst for inst in all_instances
4164 if compat.any(node in group.members
4165 for node in inst.all_nodes)])
4166 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4167 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4168 new = _ComputeNewInstanceViolations(ipol,
4169 new_ipolicy, instances)
4171 violations.update(new)
4174 self.LogWarning("After the ipolicy change the following instances"
4175 " violate them: %s",
4176 utils.CommaJoin(utils.NiceSort(violations)))
4178 if self.op.nicparams:
4179 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4180 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4181 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4184 # check all instances for consistency
4185 for instance in self.cfg.GetAllInstancesInfo().values():
4186 for nic_idx, nic in enumerate(instance.nics):
4187 params_copy = copy.deepcopy(nic.nicparams)
4188 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4190 # check parameter syntax
4192 objects.NIC.CheckParameterSyntax(params_filled)
4193 except errors.ConfigurationError, err:
4194 nic_errors.append("Instance %s, nic/%d: %s" %
4195 (instance.name, nic_idx, err))
4197 # if we're moving instances to routed, check that they have an ip
4198 target_mode = params_filled[constants.NIC_MODE]
4199 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4200 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4201 " address" % (instance.name, nic_idx))
4203 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4204 "\n".join(nic_errors), errors.ECODE_INVAL)
4206 # hypervisor list/parameters
4207 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4208 if self.op.hvparams:
4209 for hv_name, hv_dict in self.op.hvparams.items():
4210 if hv_name not in self.new_hvparams:
4211 self.new_hvparams[hv_name] = hv_dict
4213 self.new_hvparams[hv_name].update(hv_dict)
4215 # disk template parameters
4216 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4217 if self.op.diskparams:
4218 for dt_name, dt_params in self.op.diskparams.items():
4219 if dt_name not in self.op.diskparams:
4220 self.new_diskparams[dt_name] = dt_params
4222 self.new_diskparams[dt_name].update(dt_params)
4224 # os hypervisor parameters
4225 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4227 for os_name, hvs in self.op.os_hvp.items():
4228 if os_name not in self.new_os_hvp:
4229 self.new_os_hvp[os_name] = hvs
4231 for hv_name, hv_dict in hvs.items():
4232 if hv_name not in self.new_os_hvp[os_name]:
4233 self.new_os_hvp[os_name][hv_name] = hv_dict
4235 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4238 self.new_osp = objects.FillDict(cluster.osparams, {})
4239 if self.op.osparams:
4240 for os_name, osp in self.op.osparams.items():
4241 if os_name not in self.new_osp:
4242 self.new_osp[os_name] = {}
4244 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4247 if not self.new_osp[os_name]:
4248 # we removed all parameters
4249 del self.new_osp[os_name]
4251 # check the parameter validity (remote check)
4252 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4253 os_name, self.new_osp[os_name])
4255 # changes to the hypervisor list
4256 if self.op.enabled_hypervisors is not None:
4257 self.hv_list = self.op.enabled_hypervisors
4258 for hv in self.hv_list:
4259 # if the hypervisor doesn't already exist in the cluster
4260 # hvparams, we initialize it to empty, and then (in both
4261 # cases) we make sure to fill the defaults, as we might not
4262 # have a complete defaults list if the hypervisor wasn't
4264 if hv not in new_hvp:
4266 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4267 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4269 self.hv_list = cluster.enabled_hypervisors
4271 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4272 # either the enabled list has changed, or the parameters have, validate
4273 for hv_name, hv_params in self.new_hvparams.items():
4274 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4275 (self.op.enabled_hypervisors and
4276 hv_name in self.op.enabled_hypervisors)):
4277 # either this is a new hypervisor, or its parameters have changed
4278 hv_class = hypervisor.GetHypervisor(hv_name)
4279 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4280 hv_class.CheckParameterSyntax(hv_params)
4281 _CheckHVParams(self, node_list, hv_name, hv_params)
4284 # no need to check any newly-enabled hypervisors, since the
4285 # defaults have already been checked in the above code-block
4286 for os_name, os_hvp in self.new_os_hvp.items():
4287 for hv_name, hv_params in os_hvp.items():
4288 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4289 # we need to fill in the new os_hvp on top of the actual hv_p
4290 cluster_defaults = self.new_hvparams.get(hv_name, {})
4291 new_osp = objects.FillDict(cluster_defaults, hv_params)
4292 hv_class = hypervisor.GetHypervisor(hv_name)
4293 hv_class.CheckParameterSyntax(new_osp)
4294 _CheckHVParams(self, node_list, hv_name, new_osp)
4296 if self.op.default_iallocator:
4297 alloc_script = utils.FindFile(self.op.default_iallocator,
4298 constants.IALLOCATOR_SEARCH_PATH,
4300 if alloc_script is None:
4301 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4302 " specified" % self.op.default_iallocator,
4305 def Exec(self, feedback_fn):
4306 """Change the parameters of the cluster.
4309 if self.op.vg_name is not None:
4310 new_volume = self.op.vg_name
4313 if new_volume != self.cfg.GetVGName():
4314 self.cfg.SetVGName(new_volume)
4316 feedback_fn("Cluster LVM configuration already in desired"
4317 " state, not changing")
4318 if self.op.drbd_helper is not None:
4319 new_helper = self.op.drbd_helper
4322 if new_helper != self.cfg.GetDRBDHelper():
4323 self.cfg.SetDRBDHelper(new_helper)
4325 feedback_fn("Cluster DRBD helper already in desired state,"
4327 if self.op.hvparams:
4328 self.cluster.hvparams = self.new_hvparams
4330 self.cluster.os_hvp = self.new_os_hvp
4331 if self.op.enabled_hypervisors is not None:
4332 self.cluster.hvparams = self.new_hvparams
4333 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4334 if self.op.beparams:
4335 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4336 if self.op.nicparams:
4337 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4339 self.cluster.ipolicy = self.new_ipolicy
4340 if self.op.osparams:
4341 self.cluster.osparams = self.new_osp
4342 if self.op.ndparams:
4343 self.cluster.ndparams = self.new_ndparams
4344 if self.op.diskparams:
4345 self.cluster.diskparams = self.new_diskparams
4346 if self.op.hv_state:
4347 self.cluster.hv_state_static = self.new_hv_state
4348 if self.op.disk_state:
4349 self.cluster.disk_state_static = self.new_disk_state
4351 if self.op.candidate_pool_size is not None:
4352 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4353 # we need to update the pool size here, otherwise the save will fail
4354 _AdjustCandidatePool(self, [])
4356 if self.op.maintain_node_health is not None:
4357 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4358 feedback_fn("Note: CONFD was disabled at build time, node health"
4359 " maintenance is not useful (still enabling it)")
4360 self.cluster.maintain_node_health = self.op.maintain_node_health
4362 if self.op.prealloc_wipe_disks is not None:
4363 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4365 if self.op.add_uids is not None:
4366 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4368 if self.op.remove_uids is not None:
4369 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4371 if self.op.uid_pool is not None:
4372 self.cluster.uid_pool = self.op.uid_pool
4374 if self.op.default_iallocator is not None:
4375 self.cluster.default_iallocator = self.op.default_iallocator
4377 if self.op.reserved_lvs is not None:
4378 self.cluster.reserved_lvs = self.op.reserved_lvs
4380 if self.op.use_external_mip_script is not None:
4381 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4383 def helper_os(aname, mods, desc):
4385 lst = getattr(self.cluster, aname)
4386 for key, val in mods:
4387 if key == constants.DDM_ADD:
4389 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4392 elif key == constants.DDM_REMOVE:
4396 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4398 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4400 if self.op.hidden_os:
4401 helper_os("hidden_os", self.op.hidden_os, "hidden")
4403 if self.op.blacklisted_os:
4404 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4406 if self.op.master_netdev:
4407 master_params = self.cfg.GetMasterNetworkParameters()
4408 ems = self.cfg.GetUseExternalMipScript()
4409 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4410 self.cluster.master_netdev)
4411 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4413 result.Raise("Could not disable the master ip")
4414 feedback_fn("Changing master_netdev from %s to %s" %
4415 (master_params.netdev, self.op.master_netdev))
4416 self.cluster.master_netdev = self.op.master_netdev
4418 if self.op.master_netmask:
4419 master_params = self.cfg.GetMasterNetworkParameters()
4420 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4421 result = self.rpc.call_node_change_master_netmask(master_params.name,
4422 master_params.netmask,
4423 self.op.master_netmask,
4425 master_params.netdev)
4427 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4430 self.cluster.master_netmask = self.op.master_netmask
4432 self.cfg.Update(self.cluster, feedback_fn)
4434 if self.op.master_netdev:
4435 master_params = self.cfg.GetMasterNetworkParameters()
4436 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4437 self.op.master_netdev)
4438 ems = self.cfg.GetUseExternalMipScript()
4439 result = self.rpc.call_node_activate_master_ip(master_params.name,
4442 self.LogWarning("Could not re-enable the master ip on"
4443 " the master, please restart manually: %s",
4447 def _UploadHelper(lu, nodes, fname):
4448 """Helper for uploading a file and showing warnings.
4451 if os.path.exists(fname):
4452 result = lu.rpc.call_upload_file(nodes, fname)
4453 for to_node, to_result in result.items():
4454 msg = to_result.fail_msg
4456 msg = ("Copy of file %s to node %s failed: %s" %
4457 (fname, to_node, msg))
4461 def _ComputeAncillaryFiles(cluster, redist):
4462 """Compute files external to Ganeti which need to be consistent.
4464 @type redist: boolean
4465 @param redist: Whether to include files which need to be redistributed
4468 # Compute files for all nodes
4470 pathutils.SSH_KNOWN_HOSTS_FILE,
4471 pathutils.CONFD_HMAC_KEY,
4472 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4473 pathutils.SPICE_CERT_FILE,
4474 pathutils.SPICE_CACERT_FILE,
4475 pathutils.RAPI_USERS_FILE,
4479 # we need to ship at least the RAPI certificate
4480 files_all.add(pathutils.RAPI_CERT_FILE)
4482 files_all.update(pathutils.ALL_CERT_FILES)
4483 files_all.update(ssconf.SimpleStore().GetFileList())
4485 if cluster.modify_etc_hosts:
4486 files_all.add(pathutils.ETC_HOSTS)
4488 if cluster.use_external_mip_script:
4489 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4491 # Files which are optional, these must:
4492 # - be present in one other category as well
4493 # - either exist or not exist on all nodes of that category (mc, vm all)
4495 pathutils.RAPI_USERS_FILE,
4498 # Files which should only be on master candidates
4502 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4506 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4507 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4508 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4510 # Files which should only be on VM-capable nodes
4513 for hv_name in cluster.enabled_hypervisors
4514 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4518 for hv_name in cluster.enabled_hypervisors
4519 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4521 # Filenames in each category must be unique
4522 all_files_set = files_all | files_mc | files_vm
4523 assert (len(all_files_set) ==
4524 sum(map(len, [files_all, files_mc, files_vm]))), \
4525 "Found file listed in more than one file list"
4527 # Optional files must be present in one other category
4528 assert all_files_set.issuperset(files_opt), \
4529 "Optional file not in a different required list"
4531 # This one file should never ever be re-distributed via RPC
4532 assert not (redist and
4533 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4535 return (files_all, files_opt, files_mc, files_vm)
4538 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4539 """Distribute additional files which are part of the cluster configuration.
4541 ConfigWriter takes care of distributing the config and ssconf files, but
4542 there are more files which should be distributed to all nodes. This function
4543 makes sure those are copied.
4545 @param lu: calling logical unit
4546 @param additional_nodes: list of nodes not in the config to distribute to
4547 @type additional_vm: boolean
4548 @param additional_vm: whether the additional nodes are vm-capable or not
4551 # Gather target nodes
4552 cluster = lu.cfg.GetClusterInfo()
4553 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4555 online_nodes = lu.cfg.GetOnlineNodeList()
4556 online_set = frozenset(online_nodes)
4557 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4559 if additional_nodes is not None:
4560 online_nodes.extend(additional_nodes)
4562 vm_nodes.extend(additional_nodes)
4564 # Never distribute to master node
4565 for nodelist in [online_nodes, vm_nodes]:
4566 if master_info.name in nodelist:
4567 nodelist.remove(master_info.name)
4570 (files_all, _, files_mc, files_vm) = \
4571 _ComputeAncillaryFiles(cluster, True)
4573 # Never re-distribute configuration file from here
4574 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4575 pathutils.CLUSTER_CONF_FILE in files_vm)
4576 assert not files_mc, "Master candidates not handled in this function"
4579 (online_nodes, files_all),
4580 (vm_nodes, files_vm),
4584 for (node_list, files) in filemap:
4586 _UploadHelper(lu, node_list, fname)
4589 class LUClusterRedistConf(NoHooksLU):
4590 """Force the redistribution of cluster configuration.
4592 This is a very simple LU.
4597 def ExpandNames(self):
4598 self.needed_locks = {
4599 locking.LEVEL_NODE: locking.ALL_SET,
4601 self.share_locks[locking.LEVEL_NODE] = 1
4603 def Exec(self, feedback_fn):
4604 """Redistribute the configuration.
4607 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4608 _RedistributeAncillaryFiles(self)
4611 class LUClusterActivateMasterIp(NoHooksLU):
4612 """Activate the master IP on the master node.
4615 def Exec(self, feedback_fn):
4616 """Activate the master IP.
4619 master_params = self.cfg.GetMasterNetworkParameters()
4620 ems = self.cfg.GetUseExternalMipScript()
4621 result = self.rpc.call_node_activate_master_ip(master_params.name,
4623 result.Raise("Could not activate the master IP")
4626 class LUClusterDeactivateMasterIp(NoHooksLU):
4627 """Deactivate the master IP on the master node.
4630 def Exec(self, feedback_fn):
4631 """Deactivate the master IP.
4634 master_params = self.cfg.GetMasterNetworkParameters()
4635 ems = self.cfg.GetUseExternalMipScript()
4636 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4638 result.Raise("Could not deactivate the master IP")
4641 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4642 """Sleep and poll for an instance's disk to sync.
4645 if not instance.disks or disks is not None and not disks:
4648 disks = _ExpandCheckDisks(instance, disks)
4651 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4653 node = instance.primary_node
4656 lu.cfg.SetDiskID(dev, node)
4658 # TODO: Convert to utils.Retry
4661 degr_retries = 10 # in seconds, as we sleep 1 second each time
4665 cumul_degraded = False
4666 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4667 msg = rstats.fail_msg
4669 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4672 raise errors.RemoteError("Can't contact node %s for mirror data,"
4673 " aborting." % node)
4676 rstats = rstats.payload
4678 for i, mstat in enumerate(rstats):
4680 lu.LogWarning("Can't compute data for node %s/%s",
4681 node, disks[i].iv_name)
4684 cumul_degraded = (cumul_degraded or
4685 (mstat.is_degraded and mstat.sync_percent is None))
4686 if mstat.sync_percent is not None:
4688 if mstat.estimated_time is not None:
4689 rem_time = ("%s remaining (estimated)" %
4690 utils.FormatSeconds(mstat.estimated_time))
4691 max_time = mstat.estimated_time
4693 rem_time = "no time estimate"
4694 lu.LogInfo("- device %s: %5.2f%% done, %s",
4695 disks[i].iv_name, mstat.sync_percent, rem_time)
4697 # if we're done but degraded, let's do a few small retries, to
4698 # make sure we see a stable and not transient situation; therefore
4699 # we force restart of the loop
4700 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4701 logging.info("Degraded disks found, %d retries left", degr_retries)
4709 time.sleep(min(60, max_time))
4712 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4714 return not cumul_degraded
4717 def _BlockdevFind(lu, node, dev, instance):
4718 """Wrapper around call_blockdev_find to annotate diskparams.
4720 @param lu: A reference to the lu object
4721 @param node: The node to call out
4722 @param dev: The device to find
4723 @param instance: The instance object the device belongs to
4724 @returns The result of the rpc call
4727 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4728 return lu.rpc.call_blockdev_find(node, disk)
4731 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4732 """Wrapper around L{_CheckDiskConsistencyInner}.
4735 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4736 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4740 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4742 """Check that mirrors are not degraded.
4744 @attention: The device has to be annotated already.
4746 The ldisk parameter, if True, will change the test from the
4747 is_degraded attribute (which represents overall non-ok status for
4748 the device(s)) to the ldisk (representing the local storage status).
4751 lu.cfg.SetDiskID(dev, node)
4755 if on_primary or dev.AssembleOnSecondary():
4756 rstats = lu.rpc.call_blockdev_find(node, dev)
4757 msg = rstats.fail_msg
4759 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4761 elif not rstats.payload:
4762 lu.LogWarning("Can't find disk on node %s", node)
4766 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4768 result = result and not rstats.payload.is_degraded
4771 for child in dev.children:
4772 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4778 class LUOobCommand(NoHooksLU):
4779 """Logical unit for OOB handling.
4783 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4785 def ExpandNames(self):
4786 """Gather locks we need.
4789 if self.op.node_names:
4790 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4791 lock_names = self.op.node_names
4793 lock_names = locking.ALL_SET
4795 self.needed_locks = {
4796 locking.LEVEL_NODE: lock_names,
4799 def CheckPrereq(self):
4800 """Check prerequisites.
4803 - the node exists in the configuration
4806 Any errors are signaled by raising errors.OpPrereqError.
4810 self.master_node = self.cfg.GetMasterNode()
4812 assert self.op.power_delay >= 0.0
4814 if self.op.node_names:
4815 if (self.op.command in self._SKIP_MASTER and
4816 self.master_node in self.op.node_names):
4817 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4818 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4820 if master_oob_handler:
4821 additional_text = ("run '%s %s %s' if you want to operate on the"
4822 " master regardless") % (master_oob_handler,
4826 additional_text = "it does not support out-of-band operations"
4828 raise errors.OpPrereqError(("Operating on the master node %s is not"
4829 " allowed for %s; %s") %
4830 (self.master_node, self.op.command,
4831 additional_text), errors.ECODE_INVAL)
4833 self.op.node_names = self.cfg.GetNodeList()
4834 if self.op.command in self._SKIP_MASTER:
4835 self.op.node_names.remove(self.master_node)
4837 if self.op.command in self._SKIP_MASTER:
4838 assert self.master_node not in self.op.node_names
4840 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4842 raise errors.OpPrereqError("Node %s not found" % node_name,
4845 self.nodes.append(node)
4847 if (not self.op.ignore_status and
4848 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4849 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4850 " not marked offline") % node_name,
4853 def Exec(self, feedback_fn):
4854 """Execute OOB and return result if we expect any.
4857 master_node = self.master_node
4860 for idx, node in enumerate(utils.NiceSort(self.nodes,
4861 key=lambda node: node.name)):
4862 node_entry = [(constants.RS_NORMAL, node.name)]
4863 ret.append(node_entry)
4865 oob_program = _SupportsOob(self.cfg, node)
4868 node_entry.append((constants.RS_UNAVAIL, None))
4871 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4872 self.op.command, oob_program, node.name)
4873 result = self.rpc.call_run_oob(master_node, oob_program,
4874 self.op.command, node.name,
4878 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4879 node.name, result.fail_msg)
4880 node_entry.append((constants.RS_NODATA, None))
4883 self._CheckPayload(result)
4884 except errors.OpExecError, err:
4885 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4887 node_entry.append((constants.RS_NODATA, None))
4889 if self.op.command == constants.OOB_HEALTH:
4890 # For health we should log important events
4891 for item, status in result.payload:
4892 if status in [constants.OOB_STATUS_WARNING,
4893 constants.OOB_STATUS_CRITICAL]:
4894 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4895 item, node.name, status)
4897 if self.op.command == constants.OOB_POWER_ON:
4899 elif self.op.command == constants.OOB_POWER_OFF:
4900 node.powered = False
4901 elif self.op.command == constants.OOB_POWER_STATUS:
4902 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4903 if powered != node.powered:
4904 logging.warning(("Recorded power state (%s) of node '%s' does not"
4905 " match actual power state (%s)"), node.powered,
4908 # For configuration changing commands we should update the node
4909 if self.op.command in (constants.OOB_POWER_ON,
4910 constants.OOB_POWER_OFF):
4911 self.cfg.Update(node, feedback_fn)
4913 node_entry.append((constants.RS_NORMAL, result.payload))
4915 if (self.op.command == constants.OOB_POWER_ON and
4916 idx < len(self.nodes) - 1):
4917 time.sleep(self.op.power_delay)
4921 def _CheckPayload(self, result):
4922 """Checks if the payload is valid.
4924 @param result: RPC result
4925 @raises errors.OpExecError: If payload is not valid
4929 if self.op.command == constants.OOB_HEALTH:
4930 if not isinstance(result.payload, list):
4931 errs.append("command 'health' is expected to return a list but got %s" %
4932 type(result.payload))
4934 for item, status in result.payload:
4935 if status not in constants.OOB_STATUSES:
4936 errs.append("health item '%s' has invalid status '%s'" %
4939 if self.op.command == constants.OOB_POWER_STATUS:
4940 if not isinstance(result.payload, dict):
4941 errs.append("power-status is expected to return a dict but got %s" %
4942 type(result.payload))
4944 if self.op.command in [
4945 constants.OOB_POWER_ON,
4946 constants.OOB_POWER_OFF,
4947 constants.OOB_POWER_CYCLE,
4949 if result.payload is not None:
4950 errs.append("%s is expected to not return payload but got '%s'" %
4951 (self.op.command, result.payload))
4954 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4955 utils.CommaJoin(errs))
4958 class _OsQuery(_QueryBase):
4959 FIELDS = query.OS_FIELDS
4961 def ExpandNames(self, lu):
4962 # Lock all nodes in shared mode
4963 # Temporary removal of locks, should be reverted later
4964 # TODO: reintroduce locks when they are lighter-weight
4965 lu.needed_locks = {}
4966 #self.share_locks[locking.LEVEL_NODE] = 1
4967 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4969 # The following variables interact with _QueryBase._GetNames
4971 self.wanted = self.names
4973 self.wanted = locking.ALL_SET
4975 self.do_locking = self.use_locking
4977 def DeclareLocks(self, lu, level):
4981 def _DiagnoseByOS(rlist):
4982 """Remaps a per-node return list into an a per-os per-node dictionary
4984 @param rlist: a map with node names as keys and OS objects as values
4987 @return: a dictionary with osnames as keys and as value another
4988 map, with nodes as keys and tuples of (path, status, diagnose,
4989 variants, parameters, api_versions) as values, eg::
4991 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4992 (/srv/..., False, "invalid api")],
4993 "node2": [(/srv/..., True, "", [], [])]}
4998 # we build here the list of nodes that didn't fail the RPC (at RPC
4999 # level), so that nodes with a non-responding node daemon don't
5000 # make all OSes invalid
5001 good_nodes = [node_name for node_name in rlist
5002 if not rlist[node_name].fail_msg]
5003 for node_name, nr in rlist.items():
5004 if nr.fail_msg or not nr.payload:
5006 for (name, path, status, diagnose, variants,
5007 params, api_versions) in nr.payload:
5008 if name not in all_os:
5009 # build a list of nodes for this os containing empty lists
5010 # for each node in node_list
5012 for nname in good_nodes:
5013 all_os[name][nname] = []
5014 # convert params from [name, help] to (name, help)
5015 params = [tuple(v) for v in params]
5016 all_os[name][node_name].append((path, status, diagnose,
5017 variants, params, api_versions))
5020 def _GetQueryData(self, lu):
5021 """Computes the list of nodes and their attributes.
5024 # Locking is not used
5025 assert not (compat.any(lu.glm.is_owned(level)
5026 for level in locking.LEVELS
5027 if level != locking.LEVEL_CLUSTER) or
5028 self.do_locking or self.use_locking)
5030 valid_nodes = [node.name
5031 for node in lu.cfg.GetAllNodesInfo().values()
5032 if not node.offline and node.vm_capable]
5033 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5034 cluster = lu.cfg.GetClusterInfo()
5038 for (os_name, os_data) in pol.items():
5039 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5040 hidden=(os_name in cluster.hidden_os),
5041 blacklisted=(os_name in cluster.blacklisted_os))
5045 api_versions = set()
5047 for idx, osl in enumerate(os_data.values()):
5048 info.valid = bool(info.valid and osl and osl[0][1])
5052 (node_variants, node_params, node_api) = osl[0][3:6]
5055 variants.update(node_variants)
5056 parameters.update(node_params)
5057 api_versions.update(node_api)
5059 # Filter out inconsistent values
5060 variants.intersection_update(node_variants)
5061 parameters.intersection_update(node_params)
5062 api_versions.intersection_update(node_api)
5064 info.variants = list(variants)
5065 info.parameters = list(parameters)
5066 info.api_versions = list(api_versions)
5068 data[os_name] = info
5070 # Prepare data in requested order
5071 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5075 class LUOsDiagnose(NoHooksLU):
5076 """Logical unit for OS diagnose/query.
5082 def _BuildFilter(fields, names):
5083 """Builds a filter for querying OSes.
5086 name_filter = qlang.MakeSimpleFilter("name", names)
5088 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5089 # respective field is not requested
5090 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5091 for fname in ["hidden", "blacklisted"]
5092 if fname not in fields]
5093 if "valid" not in fields:
5094 status_filter.append([qlang.OP_TRUE, "valid"])
5097 status_filter.insert(0, qlang.OP_AND)
5099 status_filter = None
5101 if name_filter and status_filter:
5102 return [qlang.OP_AND, name_filter, status_filter]
5106 return status_filter
5108 def CheckArguments(self):
5109 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5110 self.op.output_fields, False)
5112 def ExpandNames(self):
5113 self.oq.ExpandNames(self)
5115 def Exec(self, feedback_fn):
5116 return self.oq.OldStyleQuery(self)
5119 class LUNodeRemove(LogicalUnit):
5120 """Logical unit for removing a node.
5123 HPATH = "node-remove"
5124 HTYPE = constants.HTYPE_NODE
5126 def BuildHooksEnv(self):
5131 "OP_TARGET": self.op.node_name,
5132 "NODE_NAME": self.op.node_name,
5135 def BuildHooksNodes(self):
5136 """Build hooks nodes.
5138 This doesn't run on the target node in the pre phase as a failed
5139 node would then be impossible to remove.
5142 all_nodes = self.cfg.GetNodeList()
5144 all_nodes.remove(self.op.node_name)
5147 return (all_nodes, all_nodes)
5149 def CheckPrereq(self):
5150 """Check prerequisites.
5153 - the node exists in the configuration
5154 - it does not have primary or secondary instances
5155 - it's not the master
5157 Any errors are signaled by raising errors.OpPrereqError.
5160 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5161 node = self.cfg.GetNodeInfo(self.op.node_name)
5162 assert node is not None
5164 masternode = self.cfg.GetMasterNode()
5165 if node.name == masternode:
5166 raise errors.OpPrereqError("Node is the master node, failover to another"
5167 " node is required", errors.ECODE_INVAL)
5169 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5170 if node.name in instance.all_nodes:
5171 raise errors.OpPrereqError("Instance %s is still running on the node,"
5172 " please remove first" % instance_name,
5174 self.op.node_name = node.name
5177 def Exec(self, feedback_fn):
5178 """Removes the node from the cluster.
5182 logging.info("Stopping the node daemon and removing configs from node %s",
5185 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5187 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5190 # Promote nodes to master candidate as needed
5191 _AdjustCandidatePool(self, exceptions=[node.name])
5192 self.context.RemoveNode(node.name)
5194 # Run post hooks on the node before it's removed
5195 _RunPostHook(self, node.name)
5197 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5198 msg = result.fail_msg
5200 self.LogWarning("Errors encountered on the remote node while leaving"
5201 " the cluster: %s", msg)
5203 # Remove node from our /etc/hosts
5204 if self.cfg.GetClusterInfo().modify_etc_hosts:
5205 master_node = self.cfg.GetMasterNode()
5206 result = self.rpc.call_etc_hosts_modify(master_node,
5207 constants.ETC_HOSTS_REMOVE,
5209 result.Raise("Can't update hosts file with new host data")
5210 _RedistributeAncillaryFiles(self)
5213 class _NodeQuery(_QueryBase):
5214 FIELDS = query.NODE_FIELDS
5216 def ExpandNames(self, lu):
5217 lu.needed_locks = {}
5218 lu.share_locks = _ShareAll()
5221 self.wanted = _GetWantedNodes(lu, self.names)
5223 self.wanted = locking.ALL_SET
5225 self.do_locking = (self.use_locking and
5226 query.NQ_LIVE in self.requested_data)
5229 # If any non-static field is requested we need to lock the nodes
5230 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5232 def DeclareLocks(self, lu, level):
5235 def _GetQueryData(self, lu):
5236 """Computes the list of nodes and their attributes.
5239 all_info = lu.cfg.GetAllNodesInfo()
5241 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5243 # Gather data as requested
5244 if query.NQ_LIVE in self.requested_data:
5245 # filter out non-vm_capable nodes
5246 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5248 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5249 [lu.cfg.GetHypervisorType()])
5250 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5251 for (name, nresult) in node_data.items()
5252 if not nresult.fail_msg and nresult.payload)
5256 if query.NQ_INST in self.requested_data:
5257 node_to_primary = dict([(name, set()) for name in nodenames])
5258 node_to_secondary = dict([(name, set()) for name in nodenames])
5260 inst_data = lu.cfg.GetAllInstancesInfo()
5262 for inst in inst_data.values():
5263 if inst.primary_node in node_to_primary:
5264 node_to_primary[inst.primary_node].add(inst.name)
5265 for secnode in inst.secondary_nodes:
5266 if secnode in node_to_secondary:
5267 node_to_secondary[secnode].add(inst.name)
5269 node_to_primary = None
5270 node_to_secondary = None
5272 if query.NQ_OOB in self.requested_data:
5273 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5274 for name, node in all_info.iteritems())
5278 if query.NQ_GROUP in self.requested_data:
5279 groups = lu.cfg.GetAllNodeGroupsInfo()
5283 return query.NodeQueryData([all_info[name] for name in nodenames],
5284 live_data, lu.cfg.GetMasterNode(),
5285 node_to_primary, node_to_secondary, groups,
5286 oob_support, lu.cfg.GetClusterInfo())
5289 class LUNodeQuery(NoHooksLU):
5290 """Logical unit for querying nodes.
5293 # pylint: disable=W0142
5296 def CheckArguments(self):
5297 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5298 self.op.output_fields, self.op.use_locking)
5300 def ExpandNames(self):
5301 self.nq.ExpandNames(self)
5303 def DeclareLocks(self, level):
5304 self.nq.DeclareLocks(self, level)
5306 def Exec(self, feedback_fn):
5307 return self.nq.OldStyleQuery(self)
5310 class LUNodeQueryvols(NoHooksLU):
5311 """Logical unit for getting volumes on node(s).
5315 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5316 _FIELDS_STATIC = utils.FieldSet("node")
5318 def CheckArguments(self):
5319 _CheckOutputFields(static=self._FIELDS_STATIC,
5320 dynamic=self._FIELDS_DYNAMIC,
5321 selected=self.op.output_fields)
5323 def ExpandNames(self):
5324 self.share_locks = _ShareAll()
5325 self.needed_locks = {}
5327 if not self.op.nodes:
5328 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5330 self.needed_locks[locking.LEVEL_NODE] = \
5331 _GetWantedNodes(self, self.op.nodes)
5333 def Exec(self, feedback_fn):
5334 """Computes the list of nodes and their attributes.
5337 nodenames = self.owned_locks(locking.LEVEL_NODE)
5338 volumes = self.rpc.call_node_volumes(nodenames)
5340 ilist = self.cfg.GetAllInstancesInfo()
5341 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5344 for node in nodenames:
5345 nresult = volumes[node]
5348 msg = nresult.fail_msg
5350 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5353 node_vols = sorted(nresult.payload,
5354 key=operator.itemgetter("dev"))
5356 for vol in node_vols:
5358 for field in self.op.output_fields:
5361 elif field == "phys":
5365 elif field == "name":
5367 elif field == "size":
5368 val = int(float(vol["size"]))
5369 elif field == "instance":
5370 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5372 raise errors.ParameterError(field)
5373 node_output.append(str(val))
5375 output.append(node_output)
5380 class LUNodeQueryStorage(NoHooksLU):
5381 """Logical unit for getting information on storage units on node(s).
5384 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5387 def CheckArguments(self):
5388 _CheckOutputFields(static=self._FIELDS_STATIC,
5389 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5390 selected=self.op.output_fields)
5392 def ExpandNames(self):
5393 self.share_locks = _ShareAll()
5396 self.needed_locks = {
5397 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5400 self.needed_locks = {
5401 locking.LEVEL_NODE: locking.ALL_SET,
5402 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5405 def Exec(self, feedback_fn):
5406 """Computes the list of nodes and their attributes.
5409 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5411 # Always get name to sort by
5412 if constants.SF_NAME in self.op.output_fields:
5413 fields = self.op.output_fields[:]
5415 fields = [constants.SF_NAME] + self.op.output_fields
5417 # Never ask for node or type as it's only known to the LU
5418 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5419 while extra in fields:
5420 fields.remove(extra)
5422 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5423 name_idx = field_idx[constants.SF_NAME]
5425 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5426 data = self.rpc.call_storage_list(self.nodes,
5427 self.op.storage_type, st_args,
5428 self.op.name, fields)
5432 for node in utils.NiceSort(self.nodes):
5433 nresult = data[node]
5437 msg = nresult.fail_msg
5439 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5442 rows = dict([(row[name_idx], row) for row in nresult.payload])
5444 for name in utils.NiceSort(rows.keys()):
5449 for field in self.op.output_fields:
5450 if field == constants.SF_NODE:
5452 elif field == constants.SF_TYPE:
5453 val = self.op.storage_type
5454 elif field in field_idx:
5455 val = row[field_idx[field]]
5457 raise errors.ParameterError(field)
5466 class _InstanceQuery(_QueryBase):
5467 FIELDS = query.INSTANCE_FIELDS
5469 def ExpandNames(self, lu):
5470 lu.needed_locks = {}
5471 lu.share_locks = _ShareAll()
5474 self.wanted = _GetWantedInstances(lu, self.names)
5476 self.wanted = locking.ALL_SET
5478 self.do_locking = (self.use_locking and
5479 query.IQ_LIVE in self.requested_data)
5481 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5482 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5483 lu.needed_locks[locking.LEVEL_NODE] = []
5484 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5486 self.do_grouplocks = (self.do_locking and
5487 query.IQ_NODES in self.requested_data)
5489 def DeclareLocks(self, lu, level):
5491 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5492 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5494 # Lock all groups used by instances optimistically; this requires going
5495 # via the node before it's locked, requiring verification later on
5496 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5498 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5499 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5500 elif level == locking.LEVEL_NODE:
5501 lu._LockInstancesNodes() # pylint: disable=W0212
5504 def _CheckGroupLocks(lu):
5505 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5506 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5508 # Check if node groups for locked instances are still correct
5509 for instance_name in owned_instances:
5510 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5512 def _GetQueryData(self, lu):
5513 """Computes the list of instances and their attributes.
5516 if self.do_grouplocks:
5517 self._CheckGroupLocks(lu)
5519 cluster = lu.cfg.GetClusterInfo()
5520 all_info = lu.cfg.GetAllInstancesInfo()
5522 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5524 instance_list = [all_info[name] for name in instance_names]
5525 nodes = frozenset(itertools.chain(*(inst.all_nodes
5526 for inst in instance_list)))
5527 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5530 wrongnode_inst = set()
5532 # Gather data as requested
5533 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5535 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5537 result = node_data[name]
5539 # offline nodes will be in both lists
5540 assert result.fail_msg
5541 offline_nodes.append(name)
5543 bad_nodes.append(name)
5544 elif result.payload:
5545 for inst in result.payload:
5546 if inst in all_info:
5547 if all_info[inst].primary_node == name:
5548 live_data.update(result.payload)
5550 wrongnode_inst.add(inst)
5552 # orphan instance; we don't list it here as we don't
5553 # handle this case yet in the output of instance listing
5554 logging.warning("Orphan instance '%s' found on node %s",
5556 # else no instance is alive
5560 if query.IQ_DISKUSAGE in self.requested_data:
5561 gmi = ganeti.masterd.instance
5562 disk_usage = dict((inst.name,
5563 gmi.ComputeDiskSize(inst.disk_template,
5564 [{constants.IDISK_SIZE: disk.size}
5565 for disk in inst.disks]))
5566 for inst in instance_list)
5570 if query.IQ_CONSOLE in self.requested_data:
5572 for inst in instance_list:
5573 if inst.name in live_data:
5574 # Instance is running
5575 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5577 consinfo[inst.name] = None
5578 assert set(consinfo.keys()) == set(instance_names)
5582 if query.IQ_NODES in self.requested_data:
5583 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5585 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5586 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5587 for uuid in set(map(operator.attrgetter("group"),
5593 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5594 disk_usage, offline_nodes, bad_nodes,
5595 live_data, wrongnode_inst, consinfo,
5599 class LUQuery(NoHooksLU):
5600 """Query for resources/items of a certain kind.
5603 # pylint: disable=W0142
5606 def CheckArguments(self):
5607 qcls = _GetQueryImplementation(self.op.what)
5609 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5611 def ExpandNames(self):
5612 self.impl.ExpandNames(self)
5614 def DeclareLocks(self, level):
5615 self.impl.DeclareLocks(self, level)
5617 def Exec(self, feedback_fn):
5618 return self.impl.NewStyleQuery(self)
5621 class LUQueryFields(NoHooksLU):
5622 """Query for resources/items of a certain kind.
5625 # pylint: disable=W0142
5628 def CheckArguments(self):
5629 self.qcls = _GetQueryImplementation(self.op.what)
5631 def ExpandNames(self):
5632 self.needed_locks = {}
5634 def Exec(self, feedback_fn):
5635 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5638 class LUNodeModifyStorage(NoHooksLU):
5639 """Logical unit for modifying a storage volume on a node.
5644 def CheckArguments(self):
5645 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5647 storage_type = self.op.storage_type
5650 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5652 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5653 " modified" % storage_type,
5656 diff = set(self.op.changes.keys()) - modifiable
5658 raise errors.OpPrereqError("The following fields can not be modified for"
5659 " storage units of type '%s': %r" %
5660 (storage_type, list(diff)),
5663 def ExpandNames(self):
5664 self.needed_locks = {
5665 locking.LEVEL_NODE: self.op.node_name,
5668 def Exec(self, feedback_fn):
5669 """Computes the list of nodes and their attributes.
5672 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5673 result = self.rpc.call_storage_modify(self.op.node_name,
5674 self.op.storage_type, st_args,
5675 self.op.name, self.op.changes)
5676 result.Raise("Failed to modify storage unit '%s' on %s" %
5677 (self.op.name, self.op.node_name))
5680 class LUNodeAdd(LogicalUnit):
5681 """Logical unit for adding node to the cluster.
5685 HTYPE = constants.HTYPE_NODE
5686 _NFLAGS = ["master_capable", "vm_capable"]
5688 def CheckArguments(self):
5689 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5690 # validate/normalize the node name
5691 self.hostname = netutils.GetHostname(name=self.op.node_name,
5692 family=self.primary_ip_family)
5693 self.op.node_name = self.hostname.name
5695 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5696 raise errors.OpPrereqError("Cannot readd the master node",
5699 if self.op.readd and self.op.group:
5700 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5701 " being readded", errors.ECODE_INVAL)
5703 def BuildHooksEnv(self):
5706 This will run on all nodes before, and on all nodes + the new node after.
5710 "OP_TARGET": self.op.node_name,
5711 "NODE_NAME": self.op.node_name,
5712 "NODE_PIP": self.op.primary_ip,
5713 "NODE_SIP": self.op.secondary_ip,
5714 "MASTER_CAPABLE": str(self.op.master_capable),
5715 "VM_CAPABLE": str(self.op.vm_capable),
5718 def BuildHooksNodes(self):
5719 """Build hooks nodes.
5722 # Exclude added node
5723 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5724 post_nodes = pre_nodes + [self.op.node_name, ]
5726 return (pre_nodes, post_nodes)
5728 def CheckPrereq(self):
5729 """Check prerequisites.
5732 - the new node is not already in the config
5734 - its parameters (single/dual homed) matches the cluster
5736 Any errors are signaled by raising errors.OpPrereqError.
5740 hostname = self.hostname
5741 node = hostname.name
5742 primary_ip = self.op.primary_ip = hostname.ip
5743 if self.op.secondary_ip is None:
5744 if self.primary_ip_family == netutils.IP6Address.family:
5745 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5746 " IPv4 address must be given as secondary",
5748 self.op.secondary_ip = primary_ip
5750 secondary_ip = self.op.secondary_ip
5751 if not netutils.IP4Address.IsValid(secondary_ip):
5752 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5753 " address" % secondary_ip, errors.ECODE_INVAL)
5755 node_list = cfg.GetNodeList()
5756 if not self.op.readd and node in node_list:
5757 raise errors.OpPrereqError("Node %s is already in the configuration" %
5758 node, errors.ECODE_EXISTS)
5759 elif self.op.readd and node not in node_list:
5760 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5763 self.changed_primary_ip = False
5765 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5766 if self.op.readd and node == existing_node_name:
5767 if existing_node.secondary_ip != secondary_ip:
5768 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5769 " address configuration as before",
5771 if existing_node.primary_ip != primary_ip:
5772 self.changed_primary_ip = True
5776 if (existing_node.primary_ip == primary_ip or
5777 existing_node.secondary_ip == primary_ip or
5778 existing_node.primary_ip == secondary_ip or
5779 existing_node.secondary_ip == secondary_ip):
5780 raise errors.OpPrereqError("New node ip address(es) conflict with"
5781 " existing node %s" % existing_node.name,
5782 errors.ECODE_NOTUNIQUE)
5784 # After this 'if' block, None is no longer a valid value for the
5785 # _capable op attributes
5787 old_node = self.cfg.GetNodeInfo(node)
5788 assert old_node is not None, "Can't retrieve locked node %s" % node
5789 for attr in self._NFLAGS:
5790 if getattr(self.op, attr) is None:
5791 setattr(self.op, attr, getattr(old_node, attr))
5793 for attr in self._NFLAGS:
5794 if getattr(self.op, attr) is None:
5795 setattr(self.op, attr, True)
5797 if self.op.readd and not self.op.vm_capable:
5798 pri, sec = cfg.GetNodeInstances(node)
5800 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5801 " flag set to false, but it already holds"
5802 " instances" % node,
5805 # check that the type of the node (single versus dual homed) is the
5806 # same as for the master
5807 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5808 master_singlehomed = myself.secondary_ip == myself.primary_ip
5809 newbie_singlehomed = secondary_ip == primary_ip
5810 if master_singlehomed != newbie_singlehomed:
5811 if master_singlehomed:
5812 raise errors.OpPrereqError("The master has no secondary ip but the"
5813 " new node has one",
5816 raise errors.OpPrereqError("The master has a secondary ip but the"
5817 " new node doesn't have one",
5820 # checks reachability
5821 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5822 raise errors.OpPrereqError("Node not reachable by ping",
5823 errors.ECODE_ENVIRON)
5825 if not newbie_singlehomed:
5826 # check reachability from my secondary ip to newbie's secondary ip
5827 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5828 source=myself.secondary_ip):
5829 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5830 " based ping to node daemon port",
5831 errors.ECODE_ENVIRON)
5838 if self.op.master_capable:
5839 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5841 self.master_candidate = False
5844 self.new_node = old_node
5846 node_group = cfg.LookupNodeGroup(self.op.group)
5847 self.new_node = objects.Node(name=node,
5848 primary_ip=primary_ip,
5849 secondary_ip=secondary_ip,
5850 master_candidate=self.master_candidate,
5851 offline=False, drained=False,
5854 if self.op.ndparams:
5855 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5857 if self.op.hv_state:
5858 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5860 if self.op.disk_state:
5861 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5863 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5864 # it a property on the base class.
5865 result = rpc.DnsOnlyRunner().call_version([node])[node]
5866 result.Raise("Can't get version information from node %s" % node)
5867 if constants.PROTOCOL_VERSION == result.payload:
5868 logging.info("Communication to node %s fine, sw version %s match",
5869 node, result.payload)
5871 raise errors.OpPrereqError("Version mismatch master version %s,"
5872 " node version %s" %
5873 (constants.PROTOCOL_VERSION, result.payload),
5874 errors.ECODE_ENVIRON)
5876 def Exec(self, feedback_fn):
5877 """Adds the new node to the cluster.
5880 new_node = self.new_node
5881 node = new_node.name
5883 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5886 # We adding a new node so we assume it's powered
5887 new_node.powered = True
5889 # for re-adds, reset the offline/drained/master-candidate flags;
5890 # we need to reset here, otherwise offline would prevent RPC calls
5891 # later in the procedure; this also means that if the re-add
5892 # fails, we are left with a non-offlined, broken node
5894 new_node.drained = new_node.offline = False # pylint: disable=W0201
5895 self.LogInfo("Readding a node, the offline/drained flags were reset")
5896 # if we demote the node, we do cleanup later in the procedure
5897 new_node.master_candidate = self.master_candidate
5898 if self.changed_primary_ip:
5899 new_node.primary_ip = self.op.primary_ip
5901 # copy the master/vm_capable flags
5902 for attr in self._NFLAGS:
5903 setattr(new_node, attr, getattr(self.op, attr))
5905 # notify the user about any possible mc promotion
5906 if new_node.master_candidate:
5907 self.LogInfo("Node will be a master candidate")
5909 if self.op.ndparams:
5910 new_node.ndparams = self.op.ndparams
5912 new_node.ndparams = {}
5914 if self.op.hv_state:
5915 new_node.hv_state_static = self.new_hv_state
5917 if self.op.disk_state:
5918 new_node.disk_state_static = self.new_disk_state
5920 # Add node to our /etc/hosts, and add key to known_hosts
5921 if self.cfg.GetClusterInfo().modify_etc_hosts:
5922 master_node = self.cfg.GetMasterNode()
5923 result = self.rpc.call_etc_hosts_modify(master_node,
5924 constants.ETC_HOSTS_ADD,
5927 result.Raise("Can't update hosts file with new host data")
5929 if new_node.secondary_ip != new_node.primary_ip:
5930 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5933 node_verify_list = [self.cfg.GetMasterNode()]
5934 node_verify_param = {
5935 constants.NV_NODELIST: ([node], {}),
5936 # TODO: do a node-net-test as well?
5939 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5940 self.cfg.GetClusterName())
5941 for verifier in node_verify_list:
5942 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5943 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5945 for failed in nl_payload:
5946 feedback_fn("ssh/hostname verification failed"
5947 " (checking from %s): %s" %
5948 (verifier, nl_payload[failed]))
5949 raise errors.OpExecError("ssh/hostname verification failed")
5952 _RedistributeAncillaryFiles(self)
5953 self.context.ReaddNode(new_node)
5954 # make sure we redistribute the config
5955 self.cfg.Update(new_node, feedback_fn)
5956 # and make sure the new node will not have old files around
5957 if not new_node.master_candidate:
5958 result = self.rpc.call_node_demote_from_mc(new_node.name)
5959 msg = result.fail_msg
5961 self.LogWarning("Node failed to demote itself from master"
5962 " candidate status: %s" % msg)
5964 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5965 additional_vm=self.op.vm_capable)
5966 self.context.AddNode(new_node, self.proc.GetECId())
5969 class LUNodeSetParams(LogicalUnit):
5970 """Modifies the parameters of a node.
5972 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5973 to the node role (as _ROLE_*)
5974 @cvar _R2F: a dictionary from node role to tuples of flags
5975 @cvar _FLAGS: a list of attribute names corresponding to the flags
5978 HPATH = "node-modify"
5979 HTYPE = constants.HTYPE_NODE
5981 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5983 (True, False, False): _ROLE_CANDIDATE,
5984 (False, True, False): _ROLE_DRAINED,
5985 (False, False, True): _ROLE_OFFLINE,
5986 (False, False, False): _ROLE_REGULAR,
5988 _R2F = dict((v, k) for k, v in _F2R.items())
5989 _FLAGS = ["master_candidate", "drained", "offline"]
5991 def CheckArguments(self):
5992 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5993 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5994 self.op.master_capable, self.op.vm_capable,
5995 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5997 if all_mods.count(None) == len(all_mods):
5998 raise errors.OpPrereqError("Please pass at least one modification",
6000 if all_mods.count(True) > 1:
6001 raise errors.OpPrereqError("Can't set the node into more than one"
6002 " state at the same time",
6005 # Boolean value that tells us whether we might be demoting from MC
6006 self.might_demote = (self.op.master_candidate is False or
6007 self.op.offline is True or
6008 self.op.drained is True or
6009 self.op.master_capable is False)
6011 if self.op.secondary_ip:
6012 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6013 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6014 " address" % self.op.secondary_ip,
6017 self.lock_all = self.op.auto_promote and self.might_demote
6018 self.lock_instances = self.op.secondary_ip is not None
6020 def _InstanceFilter(self, instance):
6021 """Filter for getting affected instances.
6024 return (instance.disk_template in constants.DTS_INT_MIRROR and
6025 self.op.node_name in instance.all_nodes)
6027 def ExpandNames(self):
6029 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6031 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6033 # Since modifying a node can have severe effects on currently running
6034 # operations the resource lock is at least acquired in shared mode
6035 self.needed_locks[locking.LEVEL_NODE_RES] = \
6036 self.needed_locks[locking.LEVEL_NODE]
6038 # Get node resource and instance locks in shared mode; they are not used
6039 # for anything but read-only access
6040 self.share_locks[locking.LEVEL_NODE_RES] = 1
6041 self.share_locks[locking.LEVEL_INSTANCE] = 1
6043 if self.lock_instances:
6044 self.needed_locks[locking.LEVEL_INSTANCE] = \
6045 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6047 def BuildHooksEnv(self):
6050 This runs on the master node.
6054 "OP_TARGET": self.op.node_name,
6055 "MASTER_CANDIDATE": str(self.op.master_candidate),
6056 "OFFLINE": str(self.op.offline),
6057 "DRAINED": str(self.op.drained),
6058 "MASTER_CAPABLE": str(self.op.master_capable),
6059 "VM_CAPABLE": str(self.op.vm_capable),
6062 def BuildHooksNodes(self):
6063 """Build hooks nodes.
6066 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6069 def CheckPrereq(self):
6070 """Check prerequisites.
6072 This only checks the instance list against the existing names.
6075 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6077 if self.lock_instances:
6078 affected_instances = \
6079 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6081 # Verify instance locks
6082 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6083 wanted_instances = frozenset(affected_instances.keys())
6084 if wanted_instances - owned_instances:
6085 raise errors.OpPrereqError("Instances affected by changing node %s's"
6086 " secondary IP address have changed since"
6087 " locks were acquired, wanted '%s', have"
6088 " '%s'; retry the operation" %
6090 utils.CommaJoin(wanted_instances),
6091 utils.CommaJoin(owned_instances)),
6094 affected_instances = None
6096 if (self.op.master_candidate is not None or
6097 self.op.drained is not None or
6098 self.op.offline is not None):
6099 # we can't change the master's node flags
6100 if self.op.node_name == self.cfg.GetMasterNode():
6101 raise errors.OpPrereqError("The master role can be changed"
6102 " only via master-failover",
6105 if self.op.master_candidate and not node.master_capable:
6106 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6107 " it a master candidate" % node.name,
6110 if self.op.vm_capable is False:
6111 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6113 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6114 " the vm_capable flag" % node.name,
6117 if node.master_candidate and self.might_demote and not self.lock_all:
6118 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6119 # check if after removing the current node, we're missing master
6121 (mc_remaining, mc_should, _) = \
6122 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6123 if mc_remaining < mc_should:
6124 raise errors.OpPrereqError("Not enough master candidates, please"
6125 " pass auto promote option to allow"
6126 " promotion (--auto-promote or RAPI"
6127 " auto_promote=True)", errors.ECODE_STATE)
6129 self.old_flags = old_flags = (node.master_candidate,
6130 node.drained, node.offline)
6131 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6132 self.old_role = old_role = self._F2R[old_flags]
6134 # Check for ineffective changes
6135 for attr in self._FLAGS:
6136 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6137 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6138 setattr(self.op, attr, None)
6140 # Past this point, any flag change to False means a transition
6141 # away from the respective state, as only real changes are kept
6143 # TODO: We might query the real power state if it supports OOB
6144 if _SupportsOob(self.cfg, node):
6145 if self.op.offline is False and not (node.powered or
6146 self.op.powered is True):
6147 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6148 " offline status can be reset") %
6149 self.op.node_name, errors.ECODE_STATE)
6150 elif self.op.powered is not None:
6151 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6152 " as it does not support out-of-band"
6153 " handling") % self.op.node_name,
6156 # If we're being deofflined/drained, we'll MC ourself if needed
6157 if (self.op.drained is False or self.op.offline is False or
6158 (self.op.master_capable and not node.master_capable)):
6159 if _DecideSelfPromotion(self):
6160 self.op.master_candidate = True
6161 self.LogInfo("Auto-promoting node to master candidate")
6163 # If we're no longer master capable, we'll demote ourselves from MC
6164 if self.op.master_capable is False and node.master_candidate:
6165 self.LogInfo("Demoting from master candidate")
6166 self.op.master_candidate = False
6169 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6170 if self.op.master_candidate:
6171 new_role = self._ROLE_CANDIDATE
6172 elif self.op.drained:
6173 new_role = self._ROLE_DRAINED
6174 elif self.op.offline:
6175 new_role = self._ROLE_OFFLINE
6176 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6177 # False is still in new flags, which means we're un-setting (the
6179 new_role = self._ROLE_REGULAR
6180 else: # no new flags, nothing, keep old role
6183 self.new_role = new_role
6185 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6186 # Trying to transition out of offline status
6187 result = self.rpc.call_version([node.name])[node.name]
6189 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6190 " to report its version: %s" %
6191 (node.name, result.fail_msg),
6194 self.LogWarning("Transitioning node from offline to online state"
6195 " without using re-add. Please make sure the node"
6198 # When changing the secondary ip, verify if this is a single-homed to
6199 # multi-homed transition or vice versa, and apply the relevant
6201 if self.op.secondary_ip:
6202 # Ok even without locking, because this can't be changed by any LU
6203 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6204 master_singlehomed = master.secondary_ip == master.primary_ip
6205 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6206 if self.op.force and node.name == master.name:
6207 self.LogWarning("Transitioning from single-homed to multi-homed"
6208 " cluster; all nodes will require a secondary IP"
6211 raise errors.OpPrereqError("Changing the secondary ip on a"
6212 " single-homed cluster requires the"
6213 " --force option to be passed, and the"
6214 " target node to be the master",
6216 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6217 if self.op.force and node.name == master.name:
6218 self.LogWarning("Transitioning from multi-homed to single-homed"
6219 " cluster; secondary IP addresses will have to be"
6222 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6223 " same as the primary IP on a multi-homed"
6224 " cluster, unless the --force option is"
6225 " passed, and the target node is the"
6226 " master", errors.ECODE_INVAL)
6228 assert not (frozenset(affected_instances) -
6229 self.owned_locks(locking.LEVEL_INSTANCE))
6232 if affected_instances:
6233 msg = ("Cannot change secondary IP address: offline node has"
6234 " instances (%s) configured to use it" %
6235 utils.CommaJoin(affected_instances.keys()))
6236 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6238 # On online nodes, check that no instances are running, and that
6239 # the node has the new ip and we can reach it.
6240 for instance in affected_instances.values():
6241 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6242 msg="cannot change secondary ip")
6244 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6245 if master.name != node.name:
6246 # check reachability from master secondary ip to new secondary ip
6247 if not netutils.TcpPing(self.op.secondary_ip,
6248 constants.DEFAULT_NODED_PORT,
6249 source=master.secondary_ip):
6250 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6251 " based ping to node daemon port",
6252 errors.ECODE_ENVIRON)
6254 if self.op.ndparams:
6255 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6256 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6257 self.new_ndparams = new_ndparams
6259 if self.op.hv_state:
6260 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6261 self.node.hv_state_static)
6263 if self.op.disk_state:
6264 self.new_disk_state = \
6265 _MergeAndVerifyDiskState(self.op.disk_state,
6266 self.node.disk_state_static)
6268 def Exec(self, feedback_fn):
6273 old_role = self.old_role
6274 new_role = self.new_role
6278 if self.op.ndparams:
6279 node.ndparams = self.new_ndparams
6281 if self.op.powered is not None:
6282 node.powered = self.op.powered
6284 if self.op.hv_state:
6285 node.hv_state_static = self.new_hv_state
6287 if self.op.disk_state:
6288 node.disk_state_static = self.new_disk_state
6290 for attr in ["master_capable", "vm_capable"]:
6291 val = getattr(self.op, attr)
6293 setattr(node, attr, val)
6294 result.append((attr, str(val)))
6296 if new_role != old_role:
6297 # Tell the node to demote itself, if no longer MC and not offline
6298 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6299 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6301 self.LogWarning("Node failed to demote itself: %s", msg)
6303 new_flags = self._R2F[new_role]
6304 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6306 result.append((desc, str(nf)))
6307 (node.master_candidate, node.drained, node.offline) = new_flags
6309 # we locked all nodes, we adjust the CP before updating this node
6311 _AdjustCandidatePool(self, [node.name])
6313 if self.op.secondary_ip:
6314 node.secondary_ip = self.op.secondary_ip
6315 result.append(("secondary_ip", self.op.secondary_ip))
6317 # this will trigger configuration file update, if needed
6318 self.cfg.Update(node, feedback_fn)
6320 # this will trigger job queue propagation or cleanup if the mc
6322 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6323 self.context.ReaddNode(node)
6328 class LUNodePowercycle(NoHooksLU):
6329 """Powercycles a node.
6334 def CheckArguments(self):
6335 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6336 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6337 raise errors.OpPrereqError("The node is the master and the force"
6338 " parameter was not set",
6341 def ExpandNames(self):
6342 """Locking for PowercycleNode.
6344 This is a last-resort option and shouldn't block on other
6345 jobs. Therefore, we grab no locks.
6348 self.needed_locks = {}
6350 def Exec(self, feedback_fn):
6354 result = self.rpc.call_node_powercycle(self.op.node_name,
6355 self.cfg.GetHypervisorType())
6356 result.Raise("Failed to schedule the reboot")
6357 return result.payload
6360 class LUClusterQuery(NoHooksLU):
6361 """Query cluster configuration.
6366 def ExpandNames(self):
6367 self.needed_locks = {}
6369 def Exec(self, feedback_fn):
6370 """Return cluster config.
6373 cluster = self.cfg.GetClusterInfo()
6376 # Filter just for enabled hypervisors
6377 for os_name, hv_dict in cluster.os_hvp.items():
6378 os_hvp[os_name] = {}
6379 for hv_name, hv_params in hv_dict.items():
6380 if hv_name in cluster.enabled_hypervisors:
6381 os_hvp[os_name][hv_name] = hv_params
6383 # Convert ip_family to ip_version
6384 primary_ip_version = constants.IP4_VERSION
6385 if cluster.primary_ip_family == netutils.IP6Address.family:
6386 primary_ip_version = constants.IP6_VERSION
6389 "software_version": constants.RELEASE_VERSION,
6390 "protocol_version": constants.PROTOCOL_VERSION,
6391 "config_version": constants.CONFIG_VERSION,
6392 "os_api_version": max(constants.OS_API_VERSIONS),
6393 "export_version": constants.EXPORT_VERSION,
6394 "architecture": runtime.GetArchInfo(),
6395 "name": cluster.cluster_name,
6396 "master": cluster.master_node,
6397 "default_hypervisor": cluster.primary_hypervisor,
6398 "enabled_hypervisors": cluster.enabled_hypervisors,
6399 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6400 for hypervisor_name in cluster.enabled_hypervisors]),
6402 "beparams": cluster.beparams,
6403 "osparams": cluster.osparams,
6404 "ipolicy": cluster.ipolicy,
6405 "nicparams": cluster.nicparams,
6406 "ndparams": cluster.ndparams,
6407 "diskparams": cluster.diskparams,
6408 "candidate_pool_size": cluster.candidate_pool_size,
6409 "master_netdev": cluster.master_netdev,
6410 "master_netmask": cluster.master_netmask,
6411 "use_external_mip_script": cluster.use_external_mip_script,
6412 "volume_group_name": cluster.volume_group_name,
6413 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6414 "file_storage_dir": cluster.file_storage_dir,
6415 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6416 "maintain_node_health": cluster.maintain_node_health,
6417 "ctime": cluster.ctime,
6418 "mtime": cluster.mtime,
6419 "uuid": cluster.uuid,
6420 "tags": list(cluster.GetTags()),
6421 "uid_pool": cluster.uid_pool,
6422 "default_iallocator": cluster.default_iallocator,
6423 "reserved_lvs": cluster.reserved_lvs,
6424 "primary_ip_version": primary_ip_version,
6425 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6426 "hidden_os": cluster.hidden_os,
6427 "blacklisted_os": cluster.blacklisted_os,
6433 class LUClusterConfigQuery(NoHooksLU):
6434 """Return configuration values.
6439 def CheckArguments(self):
6440 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6442 def ExpandNames(self):
6443 self.cq.ExpandNames(self)
6445 def DeclareLocks(self, level):
6446 self.cq.DeclareLocks(self, level)
6448 def Exec(self, feedback_fn):
6449 result = self.cq.OldStyleQuery(self)
6451 assert len(result) == 1
6456 class _ClusterQuery(_QueryBase):
6457 FIELDS = query.CLUSTER_FIELDS
6459 #: Do not sort (there is only one item)
6462 def ExpandNames(self, lu):
6463 lu.needed_locks = {}
6465 # The following variables interact with _QueryBase._GetNames
6466 self.wanted = locking.ALL_SET
6467 self.do_locking = self.use_locking
6470 raise errors.OpPrereqError("Can not use locking for cluster queries",
6473 def DeclareLocks(self, lu, level):
6476 def _GetQueryData(self, lu):
6477 """Computes the list of nodes and their attributes.
6480 # Locking is not used
6481 assert not (compat.any(lu.glm.is_owned(level)
6482 for level in locking.LEVELS
6483 if level != locking.LEVEL_CLUSTER) or
6484 self.do_locking or self.use_locking)
6486 if query.CQ_CONFIG in self.requested_data:
6487 cluster = lu.cfg.GetClusterInfo()
6489 cluster = NotImplemented
6491 if query.CQ_QUEUE_DRAINED in self.requested_data:
6492 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6494 drain_flag = NotImplemented
6496 if query.CQ_WATCHER_PAUSE in self.requested_data:
6497 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6499 watcher_pause = NotImplemented
6501 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6504 class LUInstanceActivateDisks(NoHooksLU):
6505 """Bring up an instance's disks.
6510 def ExpandNames(self):
6511 self._ExpandAndLockInstance()
6512 self.needed_locks[locking.LEVEL_NODE] = []
6513 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6515 def DeclareLocks(self, level):
6516 if level == locking.LEVEL_NODE:
6517 self._LockInstancesNodes()
6519 def CheckPrereq(self):
6520 """Check prerequisites.
6522 This checks that the instance is in the cluster.
6525 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6526 assert self.instance is not None, \
6527 "Cannot retrieve locked instance %s" % self.op.instance_name
6528 _CheckNodeOnline(self, self.instance.primary_node)
6530 def Exec(self, feedback_fn):
6531 """Activate the disks.
6534 disks_ok, disks_info = \
6535 _AssembleInstanceDisks(self, self.instance,
6536 ignore_size=self.op.ignore_size)
6538 raise errors.OpExecError("Cannot activate block devices")
6540 if self.op.wait_for_sync:
6541 if not _WaitForSync(self, self.instance):
6542 raise errors.OpExecError("Some disks of the instance are degraded!")
6547 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6549 """Prepare the block devices for an instance.
6551 This sets up the block devices on all nodes.
6553 @type lu: L{LogicalUnit}
6554 @param lu: the logical unit on whose behalf we execute
6555 @type instance: L{objects.Instance}
6556 @param instance: the instance for whose disks we assemble
6557 @type disks: list of L{objects.Disk} or None
6558 @param disks: which disks to assemble (or all, if None)
6559 @type ignore_secondaries: boolean
6560 @param ignore_secondaries: if true, errors on secondary nodes
6561 won't result in an error return from the function
6562 @type ignore_size: boolean
6563 @param ignore_size: if true, the current known size of the disk
6564 will not be used during the disk activation, useful for cases
6565 when the size is wrong
6566 @return: False if the operation failed, otherwise a list of
6567 (host, instance_visible_name, node_visible_name)
6568 with the mapping from node devices to instance devices
6573 iname = instance.name
6574 disks = _ExpandCheckDisks(instance, disks)
6576 # With the two passes mechanism we try to reduce the window of
6577 # opportunity for the race condition of switching DRBD to primary
6578 # before handshaking occured, but we do not eliminate it
6580 # The proper fix would be to wait (with some limits) until the
6581 # connection has been made and drbd transitions from WFConnection
6582 # into any other network-connected state (Connected, SyncTarget,
6585 # 1st pass, assemble on all nodes in secondary mode
6586 for idx, inst_disk in enumerate(disks):
6587 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6589 node_disk = node_disk.Copy()
6590 node_disk.UnsetSize()
6591 lu.cfg.SetDiskID(node_disk, node)
6592 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6594 msg = result.fail_msg
6596 is_offline_secondary = (node in instance.secondary_nodes and
6598 lu.LogWarning("Could not prepare block device %s on node %s"
6599 " (is_primary=False, pass=1): %s",
6600 inst_disk.iv_name, node, msg)
6601 if not (ignore_secondaries or is_offline_secondary):
6604 # FIXME: race condition on drbd migration to primary
6606 # 2nd pass, do only the primary node
6607 for idx, inst_disk in enumerate(disks):
6610 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6611 if node != instance.primary_node:
6614 node_disk = node_disk.Copy()
6615 node_disk.UnsetSize()
6616 lu.cfg.SetDiskID(node_disk, node)
6617 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6619 msg = result.fail_msg
6621 lu.LogWarning("Could not prepare block device %s on node %s"
6622 " (is_primary=True, pass=2): %s",
6623 inst_disk.iv_name, node, msg)
6626 dev_path = result.payload
6628 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6630 # leave the disks configured for the primary node
6631 # this is a workaround that would be fixed better by
6632 # improving the logical/physical id handling
6634 lu.cfg.SetDiskID(disk, instance.primary_node)
6636 return disks_ok, device_info
6639 def _StartInstanceDisks(lu, instance, force):
6640 """Start the disks of an instance.
6643 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6644 ignore_secondaries=force)
6646 _ShutdownInstanceDisks(lu, instance)
6647 if force is not None and not force:
6649 hint=("If the message above refers to a secondary node,"
6650 " you can retry the operation using '--force'"))
6651 raise errors.OpExecError("Disk consistency error")
6654 class LUInstanceDeactivateDisks(NoHooksLU):
6655 """Shutdown an instance's disks.
6660 def ExpandNames(self):
6661 self._ExpandAndLockInstance()
6662 self.needed_locks[locking.LEVEL_NODE] = []
6663 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6665 def DeclareLocks(self, level):
6666 if level == locking.LEVEL_NODE:
6667 self._LockInstancesNodes()
6669 def CheckPrereq(self):
6670 """Check prerequisites.
6672 This checks that the instance is in the cluster.
6675 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6676 assert self.instance is not None, \
6677 "Cannot retrieve locked instance %s" % self.op.instance_name
6679 def Exec(self, feedback_fn):
6680 """Deactivate the disks
6683 instance = self.instance
6685 _ShutdownInstanceDisks(self, instance)
6687 _SafeShutdownInstanceDisks(self, instance)
6690 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6691 """Shutdown block devices of an instance.
6693 This function checks if an instance is running, before calling
6694 _ShutdownInstanceDisks.
6697 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6698 _ShutdownInstanceDisks(lu, instance, disks=disks)
6701 def _ExpandCheckDisks(instance, disks):
6702 """Return the instance disks selected by the disks list
6704 @type disks: list of L{objects.Disk} or None
6705 @param disks: selected disks
6706 @rtype: list of L{objects.Disk}
6707 @return: selected instance disks to act on
6711 return instance.disks
6713 if not set(disks).issubset(instance.disks):
6714 raise errors.ProgrammerError("Can only act on disks belonging to the"
6719 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6720 """Shutdown block devices of an instance.
6722 This does the shutdown on all nodes of the instance.
6724 If the ignore_primary is false, errors on the primary node are
6729 disks = _ExpandCheckDisks(instance, disks)
6732 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6733 lu.cfg.SetDiskID(top_disk, node)
6734 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6735 msg = result.fail_msg
6737 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6738 disk.iv_name, node, msg)
6739 if ((node == instance.primary_node and not ignore_primary) or
6740 (node != instance.primary_node and not result.offline)):
6745 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6746 """Checks if a node has enough free memory.
6748 This function check if a given node has the needed amount of free
6749 memory. In case the node has less memory or we cannot get the
6750 information from the node, this function raise an OpPrereqError
6753 @type lu: C{LogicalUnit}
6754 @param lu: a logical unit from which we get configuration data
6756 @param node: the node to check
6757 @type reason: C{str}
6758 @param reason: string to use in the error message
6759 @type requested: C{int}
6760 @param requested: the amount of memory in MiB to check for
6761 @type hypervisor_name: C{str}
6762 @param hypervisor_name: the hypervisor to ask for memory stats
6764 @return: node current free memory
6765 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6766 we cannot check the node
6769 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6770 nodeinfo[node].Raise("Can't get data from node %s" % node,
6771 prereq=True, ecode=errors.ECODE_ENVIRON)
6772 (_, _, (hv_info, )) = nodeinfo[node].payload
6774 free_mem = hv_info.get("memory_free", None)
6775 if not isinstance(free_mem, int):
6776 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6777 " was '%s'" % (node, free_mem),
6778 errors.ECODE_ENVIRON)
6779 if requested > free_mem:
6780 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6781 " needed %s MiB, available %s MiB" %
6782 (node, reason, requested, free_mem),
6787 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6788 """Checks if nodes have enough free disk space in the all VGs.
6790 This function check if all given nodes have the needed amount of
6791 free disk. In case any node has less disk or we cannot get the
6792 information from the node, this function raise an OpPrereqError
6795 @type lu: C{LogicalUnit}
6796 @param lu: a logical unit from which we get configuration data
6797 @type nodenames: C{list}
6798 @param nodenames: the list of node names to check
6799 @type req_sizes: C{dict}
6800 @param req_sizes: the hash of vg and corresponding amount of disk in
6802 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6803 or we cannot check the node
6806 for vg, req_size in req_sizes.items():
6807 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6810 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6811 """Checks if nodes have enough free disk space in the specified VG.
6813 This function check if all given nodes have the needed amount of
6814 free disk. In case any node has less disk or we cannot get the
6815 information from the node, this function raise an OpPrereqError
6818 @type lu: C{LogicalUnit}
6819 @param lu: a logical unit from which we get configuration data
6820 @type nodenames: C{list}
6821 @param nodenames: the list of node names to check
6823 @param vg: the volume group to check
6824 @type requested: C{int}
6825 @param requested: the amount of disk in MiB to check for
6826 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6827 or we cannot check the node
6830 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6831 for node in nodenames:
6832 info = nodeinfo[node]
6833 info.Raise("Cannot get current information from node %s" % node,
6834 prereq=True, ecode=errors.ECODE_ENVIRON)
6835 (_, (vg_info, ), _) = info.payload
6836 vg_free = vg_info.get("vg_free", None)
6837 if not isinstance(vg_free, int):
6838 raise errors.OpPrereqError("Can't compute free disk space on node"
6839 " %s for vg %s, result was '%s'" %
6840 (node, vg, vg_free), errors.ECODE_ENVIRON)
6841 if requested > vg_free:
6842 raise errors.OpPrereqError("Not enough disk space on target node %s"
6843 " vg %s: required %d MiB, available %d MiB" %
6844 (node, vg, requested, vg_free),
6848 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6849 """Checks if nodes have enough physical CPUs
6851 This function checks if all given nodes have the needed number of
6852 physical CPUs. In case any node has less CPUs or we cannot get the
6853 information from the node, this function raises an OpPrereqError
6856 @type lu: C{LogicalUnit}
6857 @param lu: a logical unit from which we get configuration data
6858 @type nodenames: C{list}
6859 @param nodenames: the list of node names to check
6860 @type requested: C{int}
6861 @param requested: the minimum acceptable number of physical CPUs
6862 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6863 or we cannot check the node
6866 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6867 for node in nodenames:
6868 info = nodeinfo[node]
6869 info.Raise("Cannot get current information from node %s" % node,
6870 prereq=True, ecode=errors.ECODE_ENVIRON)
6871 (_, _, (hv_info, )) = info.payload
6872 num_cpus = hv_info.get("cpu_total", None)
6873 if not isinstance(num_cpus, int):
6874 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6875 " on node %s, result was '%s'" %
6876 (node, num_cpus), errors.ECODE_ENVIRON)
6877 if requested > num_cpus:
6878 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6879 "required" % (node, num_cpus, requested),
6883 class LUInstanceStartup(LogicalUnit):
6884 """Starts an instance.
6887 HPATH = "instance-start"
6888 HTYPE = constants.HTYPE_INSTANCE
6891 def CheckArguments(self):
6893 if self.op.beparams:
6894 # fill the beparams dict
6895 objects.UpgradeBeParams(self.op.beparams)
6896 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6898 def ExpandNames(self):
6899 self._ExpandAndLockInstance()
6900 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6902 def DeclareLocks(self, level):
6903 if level == locking.LEVEL_NODE_RES:
6904 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6906 def BuildHooksEnv(self):
6909 This runs on master, primary and secondary nodes of the instance.
6913 "FORCE": self.op.force,
6916 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6920 def BuildHooksNodes(self):
6921 """Build hooks nodes.
6924 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6927 def CheckPrereq(self):
6928 """Check prerequisites.
6930 This checks that the instance is in the cluster.
6933 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6934 assert self.instance is not None, \
6935 "Cannot retrieve locked instance %s" % self.op.instance_name
6938 if self.op.hvparams:
6939 # check hypervisor parameter syntax (locally)
6940 cluster = self.cfg.GetClusterInfo()
6941 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6942 filled_hvp = cluster.FillHV(instance)
6943 filled_hvp.update(self.op.hvparams)
6944 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6945 hv_type.CheckParameterSyntax(filled_hvp)
6946 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6948 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6950 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6952 if self.primary_offline and self.op.ignore_offline_nodes:
6953 self.LogWarning("Ignoring offline primary node")
6955 if self.op.hvparams or self.op.beparams:
6956 self.LogWarning("Overridden parameters are ignored")
6958 _CheckNodeOnline(self, instance.primary_node)
6960 bep = self.cfg.GetClusterInfo().FillBE(instance)
6961 bep.update(self.op.beparams)
6963 # check bridges existence
6964 _CheckInstanceBridgesExist(self, instance)
6966 remote_info = self.rpc.call_instance_info(instance.primary_node,
6968 instance.hypervisor)
6969 remote_info.Raise("Error checking node %s" % instance.primary_node,
6970 prereq=True, ecode=errors.ECODE_ENVIRON)
6971 if not remote_info.payload: # not running already
6972 _CheckNodeFreeMemory(self, instance.primary_node,
6973 "starting instance %s" % instance.name,
6974 bep[constants.BE_MINMEM], instance.hypervisor)
6976 def Exec(self, feedback_fn):
6977 """Start the instance.
6980 instance = self.instance
6981 force = self.op.force
6983 if not self.op.no_remember:
6984 self.cfg.MarkInstanceUp(instance.name)
6986 if self.primary_offline:
6987 assert self.op.ignore_offline_nodes
6988 self.LogInfo("Primary node offline, marked instance as started")
6990 node_current = instance.primary_node
6992 _StartInstanceDisks(self, instance, force)
6995 self.rpc.call_instance_start(node_current,
6996 (instance, self.op.hvparams,
6998 self.op.startup_paused)
6999 msg = result.fail_msg
7001 _ShutdownInstanceDisks(self, instance)
7002 raise errors.OpExecError("Could not start instance: %s" % msg)
7005 class LUInstanceReboot(LogicalUnit):
7006 """Reboot an instance.
7009 HPATH = "instance-reboot"
7010 HTYPE = constants.HTYPE_INSTANCE
7013 def ExpandNames(self):
7014 self._ExpandAndLockInstance()
7016 def BuildHooksEnv(self):
7019 This runs on master, primary and secondary nodes of the instance.
7023 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7024 "REBOOT_TYPE": self.op.reboot_type,
7025 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7028 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7032 def BuildHooksNodes(self):
7033 """Build hooks nodes.
7036 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7039 def CheckPrereq(self):
7040 """Check prerequisites.
7042 This checks that the instance is in the cluster.
7045 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7046 assert self.instance is not None, \
7047 "Cannot retrieve locked instance %s" % self.op.instance_name
7048 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7049 _CheckNodeOnline(self, instance.primary_node)
7051 # check bridges existence
7052 _CheckInstanceBridgesExist(self, instance)
7054 def Exec(self, feedback_fn):
7055 """Reboot the instance.
7058 instance = self.instance
7059 ignore_secondaries = self.op.ignore_secondaries
7060 reboot_type = self.op.reboot_type
7062 remote_info = self.rpc.call_instance_info(instance.primary_node,
7064 instance.hypervisor)
7065 remote_info.Raise("Error checking node %s" % instance.primary_node)
7066 instance_running = bool(remote_info.payload)
7068 node_current = instance.primary_node
7070 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7071 constants.INSTANCE_REBOOT_HARD]:
7072 for disk in instance.disks:
7073 self.cfg.SetDiskID(disk, node_current)
7074 result = self.rpc.call_instance_reboot(node_current, instance,
7076 self.op.shutdown_timeout)
7077 result.Raise("Could not reboot instance")
7079 if instance_running:
7080 result = self.rpc.call_instance_shutdown(node_current, instance,
7081 self.op.shutdown_timeout)
7082 result.Raise("Could not shutdown instance for full reboot")
7083 _ShutdownInstanceDisks(self, instance)
7085 self.LogInfo("Instance %s was already stopped, starting now",
7087 _StartInstanceDisks(self, instance, ignore_secondaries)
7088 result = self.rpc.call_instance_start(node_current,
7089 (instance, None, None), False)
7090 msg = result.fail_msg
7092 _ShutdownInstanceDisks(self, instance)
7093 raise errors.OpExecError("Could not start instance for"
7094 " full reboot: %s" % msg)
7096 self.cfg.MarkInstanceUp(instance.name)
7099 class LUInstanceShutdown(LogicalUnit):
7100 """Shutdown an instance.
7103 HPATH = "instance-stop"
7104 HTYPE = constants.HTYPE_INSTANCE
7107 def ExpandNames(self):
7108 self._ExpandAndLockInstance()
7110 def BuildHooksEnv(self):
7113 This runs on master, primary and secondary nodes of the instance.
7116 env = _BuildInstanceHookEnvByObject(self, self.instance)
7117 env["TIMEOUT"] = self.op.timeout
7120 def BuildHooksNodes(self):
7121 """Build hooks nodes.
7124 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7127 def CheckPrereq(self):
7128 """Check prerequisites.
7130 This checks that the instance is in the cluster.
7133 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7134 assert self.instance is not None, \
7135 "Cannot retrieve locked instance %s" % self.op.instance_name
7137 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7139 self.primary_offline = \
7140 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7142 if self.primary_offline and self.op.ignore_offline_nodes:
7143 self.LogWarning("Ignoring offline primary node")
7145 _CheckNodeOnline(self, self.instance.primary_node)
7147 def Exec(self, feedback_fn):
7148 """Shutdown the instance.
7151 instance = self.instance
7152 node_current = instance.primary_node
7153 timeout = self.op.timeout
7155 if not self.op.no_remember:
7156 self.cfg.MarkInstanceDown(instance.name)
7158 if self.primary_offline:
7159 assert self.op.ignore_offline_nodes
7160 self.LogInfo("Primary node offline, marked instance as stopped")
7162 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7163 msg = result.fail_msg
7165 self.LogWarning("Could not shutdown instance: %s", msg)
7167 _ShutdownInstanceDisks(self, instance)
7170 class LUInstanceReinstall(LogicalUnit):
7171 """Reinstall an instance.
7174 HPATH = "instance-reinstall"
7175 HTYPE = constants.HTYPE_INSTANCE
7178 def ExpandNames(self):
7179 self._ExpandAndLockInstance()
7181 def BuildHooksEnv(self):
7184 This runs on master, primary and secondary nodes of the instance.
7187 return _BuildInstanceHookEnvByObject(self, self.instance)
7189 def BuildHooksNodes(self):
7190 """Build hooks nodes.
7193 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7196 def CheckPrereq(self):
7197 """Check prerequisites.
7199 This checks that the instance is in the cluster and is not running.
7202 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7203 assert instance is not None, \
7204 "Cannot retrieve locked instance %s" % self.op.instance_name
7205 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7206 " offline, cannot reinstall")
7208 if instance.disk_template == constants.DT_DISKLESS:
7209 raise errors.OpPrereqError("Instance '%s' has no disks" %
7210 self.op.instance_name,
7212 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7214 if self.op.os_type is not None:
7216 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7217 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7218 instance_os = self.op.os_type
7220 instance_os = instance.os
7222 nodelist = list(instance.all_nodes)
7224 if self.op.osparams:
7225 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7226 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7227 self.os_inst = i_osdict # the new dict (without defaults)
7231 self.instance = instance
7233 def Exec(self, feedback_fn):
7234 """Reinstall the instance.
7237 inst = self.instance
7239 if self.op.os_type is not None:
7240 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7241 inst.os = self.op.os_type
7242 # Write to configuration
7243 self.cfg.Update(inst, feedback_fn)
7245 _StartInstanceDisks(self, inst, None)
7247 feedback_fn("Running the instance OS create scripts...")
7248 # FIXME: pass debug option from opcode to backend
7249 result = self.rpc.call_instance_os_add(inst.primary_node,
7250 (inst, self.os_inst), True,
7251 self.op.debug_level)
7252 result.Raise("Could not install OS for instance %s on node %s" %
7253 (inst.name, inst.primary_node))
7255 _ShutdownInstanceDisks(self, inst)
7258 class LUInstanceRecreateDisks(LogicalUnit):
7259 """Recreate an instance's missing disks.
7262 HPATH = "instance-recreate-disks"
7263 HTYPE = constants.HTYPE_INSTANCE
7266 _MODIFYABLE = frozenset([
7267 constants.IDISK_SIZE,
7268 constants.IDISK_MODE,
7271 # New or changed disk parameters may have different semantics
7272 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7273 constants.IDISK_ADOPT,
7275 # TODO: Implement support changing VG while recreating
7277 constants.IDISK_METAVG,
7280 def _RunAllocator(self):
7281 """Run the allocator based on input opcode.
7284 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7287 # The allocator should actually run in "relocate" mode, but current
7288 # allocators don't support relocating all the nodes of an instance at
7289 # the same time. As a workaround we use "allocate" mode, but this is
7290 # suboptimal for two reasons:
7291 # - The instance name passed to the allocator is present in the list of
7292 # existing instances, so there could be a conflict within the
7293 # internal structures of the allocator. This doesn't happen with the
7294 # current allocators, but it's a liability.
7295 # - The allocator counts the resources used by the instance twice: once
7296 # because the instance exists already, and once because it tries to
7297 # allocate a new instance.
7298 # The allocator could choose some of the nodes on which the instance is
7299 # running, but that's not a problem. If the instance nodes are broken,
7300 # they should be already be marked as drained or offline, and hence
7301 # skipped by the allocator. If instance disks have been lost for other
7302 # reasons, then recreating the disks on the same nodes should be fine.
7303 disk_template = self.instance.disk_template
7304 spindle_use = be_full[constants.BE_SPINDLE_USE]
7305 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7306 disk_template=disk_template,
7307 tags=list(self.instance.GetTags()),
7308 os=self.instance.os,
7310 vcpus=be_full[constants.BE_VCPUS],
7311 memory=be_full[constants.BE_MAXMEM],
7312 spindle_use=spindle_use,
7313 disks=[{constants.IDISK_SIZE: d.size,
7314 constants.IDISK_MODE: d.mode}
7315 for d in self.instance.disks],
7316 hypervisor=self.instance.hypervisor)
7317 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7319 ial.Run(self.op.iallocator)
7321 assert req.RequiredNodes() == len(self.instance.all_nodes)
7324 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7325 " %s" % (self.op.iallocator, ial.info),
7328 self.op.nodes = ial.result
7329 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7330 self.op.instance_name, self.op.iallocator,
7331 utils.CommaJoin(ial.result))
7333 def CheckArguments(self):
7334 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7335 # Normalize and convert deprecated list of disk indices
7336 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7338 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7340 raise errors.OpPrereqError("Some disks have been specified more than"
7341 " once: %s" % utils.CommaJoin(duplicates),
7344 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7345 # when neither iallocator nor nodes are specified
7346 if self.op.iallocator or self.op.nodes:
7347 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7349 for (idx, params) in self.op.disks:
7350 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7351 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7353 raise errors.OpPrereqError("Parameters for disk %s try to change"
7354 " unmodifyable parameter(s): %s" %
7355 (idx, utils.CommaJoin(unsupported)),
7358 def ExpandNames(self):
7359 self._ExpandAndLockInstance()
7360 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7362 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7363 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7365 self.needed_locks[locking.LEVEL_NODE] = []
7366 if self.op.iallocator:
7367 # iallocator will select a new node in the same group
7368 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7369 self.needed_locks[locking.LEVEL_NODE_RES] = []
7371 def DeclareLocks(self, level):
7372 if level == locking.LEVEL_NODEGROUP:
7373 assert self.op.iallocator is not None
7374 assert not self.op.nodes
7375 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7376 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7377 # Lock the primary group used by the instance optimistically; this
7378 # requires going via the node before it's locked, requiring
7379 # verification later on
7380 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7381 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7383 elif level == locking.LEVEL_NODE:
7384 # If an allocator is used, then we lock all the nodes in the current
7385 # instance group, as we don't know yet which ones will be selected;
7386 # if we replace the nodes without using an allocator, locks are
7387 # already declared in ExpandNames; otherwise, we need to lock all the
7388 # instance nodes for disk re-creation
7389 if self.op.iallocator:
7390 assert not self.op.nodes
7391 assert not self.needed_locks[locking.LEVEL_NODE]
7392 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7394 # Lock member nodes of the group of the primary node
7395 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7396 self.needed_locks[locking.LEVEL_NODE].extend(
7397 self.cfg.GetNodeGroup(group_uuid).members)
7398 elif not self.op.nodes:
7399 self._LockInstancesNodes(primary_only=False)
7400 elif level == locking.LEVEL_NODE_RES:
7402 self.needed_locks[locking.LEVEL_NODE_RES] = \
7403 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7405 def BuildHooksEnv(self):
7408 This runs on master, primary and secondary nodes of the instance.
7411 return _BuildInstanceHookEnvByObject(self, self.instance)
7413 def BuildHooksNodes(self):
7414 """Build hooks nodes.
7417 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7420 def CheckPrereq(self):
7421 """Check prerequisites.
7423 This checks that the instance is in the cluster and is not running.
7426 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7427 assert instance is not None, \
7428 "Cannot retrieve locked instance %s" % self.op.instance_name
7430 if len(self.op.nodes) != len(instance.all_nodes):
7431 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7432 " %d replacement nodes were specified" %
7433 (instance.name, len(instance.all_nodes),
7434 len(self.op.nodes)),
7436 assert instance.disk_template != constants.DT_DRBD8 or \
7437 len(self.op.nodes) == 2
7438 assert instance.disk_template != constants.DT_PLAIN or \
7439 len(self.op.nodes) == 1
7440 primary_node = self.op.nodes[0]
7442 primary_node = instance.primary_node
7443 if not self.op.iallocator:
7444 _CheckNodeOnline(self, primary_node)
7446 if instance.disk_template == constants.DT_DISKLESS:
7447 raise errors.OpPrereqError("Instance '%s' has no disks" %
7448 self.op.instance_name, errors.ECODE_INVAL)
7450 # Verify if node group locks are still correct
7451 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7453 # Node group locks are acquired only for the primary node (and only
7454 # when the allocator is used)
7455 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7458 # if we replace nodes *and* the old primary is offline, we don't
7459 # check the instance state
7460 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7461 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7462 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7463 msg="cannot recreate disks")
7466 self.disks = dict(self.op.disks)
7468 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7470 maxidx = max(self.disks.keys())
7471 if maxidx >= len(instance.disks):
7472 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7475 if ((self.op.nodes or self.op.iallocator) and
7476 sorted(self.disks.keys()) != range(len(instance.disks))):
7477 raise errors.OpPrereqError("Can't recreate disks partially and"
7478 " change the nodes at the same time",
7481 self.instance = instance
7483 if self.op.iallocator:
7484 self._RunAllocator()
7485 # Release unneeded node and node resource locks
7486 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7487 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7489 def Exec(self, feedback_fn):
7490 """Recreate the disks.
7493 instance = self.instance
7495 assert (self.owned_locks(locking.LEVEL_NODE) ==
7496 self.owned_locks(locking.LEVEL_NODE_RES))
7499 mods = [] # keeps track of needed changes
7501 for idx, disk in enumerate(instance.disks):
7503 changes = self.disks[idx]
7505 # Disk should not be recreated
7509 # update secondaries for disks, if needed
7510 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7511 # need to update the nodes and minors
7512 assert len(self.op.nodes) == 2
7513 assert len(disk.logical_id) == 6 # otherwise disk internals
7515 (_, _, old_port, _, _, old_secret) = disk.logical_id
7516 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7517 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7518 new_minors[0], new_minors[1], old_secret)
7519 assert len(disk.logical_id) == len(new_id)
7523 mods.append((idx, new_id, changes))
7525 # now that we have passed all asserts above, we can apply the mods
7526 # in a single run (to avoid partial changes)
7527 for idx, new_id, changes in mods:
7528 disk = instance.disks[idx]
7529 if new_id is not None:
7530 assert disk.dev_type == constants.LD_DRBD8
7531 disk.logical_id = new_id
7533 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7534 mode=changes.get(constants.IDISK_MODE, None))
7536 # change primary node, if needed
7538 instance.primary_node = self.op.nodes[0]
7539 self.LogWarning("Changing the instance's nodes, you will have to"
7540 " remove any disks left on the older nodes manually")
7543 self.cfg.Update(instance, feedback_fn)
7545 # All touched nodes must be locked
7546 mylocks = self.owned_locks(locking.LEVEL_NODE)
7547 assert mylocks.issuperset(frozenset(instance.all_nodes))
7548 _CreateDisks(self, instance, to_skip=to_skip)
7551 class LUInstanceRename(LogicalUnit):
7552 """Rename an instance.
7555 HPATH = "instance-rename"
7556 HTYPE = constants.HTYPE_INSTANCE
7558 def CheckArguments(self):
7562 if self.op.ip_check and not self.op.name_check:
7563 # TODO: make the ip check more flexible and not depend on the name check
7564 raise errors.OpPrereqError("IP address check requires a name check",
7567 def BuildHooksEnv(self):
7570 This runs on master, primary and secondary nodes of the instance.
7573 env = _BuildInstanceHookEnvByObject(self, self.instance)
7574 env["INSTANCE_NEW_NAME"] = self.op.new_name
7577 def BuildHooksNodes(self):
7578 """Build hooks nodes.
7581 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7584 def CheckPrereq(self):
7585 """Check prerequisites.
7587 This checks that the instance is in the cluster and is not running.
7590 self.op.instance_name = _ExpandInstanceName(self.cfg,
7591 self.op.instance_name)
7592 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7593 assert instance is not None
7594 _CheckNodeOnline(self, instance.primary_node)
7595 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7596 msg="cannot rename")
7597 self.instance = instance
7599 new_name = self.op.new_name
7600 if self.op.name_check:
7601 hostname = _CheckHostnameSane(self, new_name)
7602 new_name = self.op.new_name = hostname.name
7603 if (self.op.ip_check and
7604 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7605 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7606 (hostname.ip, new_name),
7607 errors.ECODE_NOTUNIQUE)
7609 instance_list = self.cfg.GetInstanceList()
7610 if new_name in instance_list and new_name != instance.name:
7611 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7612 new_name, errors.ECODE_EXISTS)
7614 def Exec(self, feedback_fn):
7615 """Rename the instance.
7618 inst = self.instance
7619 old_name = inst.name
7621 rename_file_storage = False
7622 if (inst.disk_template in constants.DTS_FILEBASED and
7623 self.op.new_name != inst.name):
7624 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7625 rename_file_storage = True
7627 self.cfg.RenameInstance(inst.name, self.op.new_name)
7628 # Change the instance lock. This is definitely safe while we hold the BGL.
7629 # Otherwise the new lock would have to be added in acquired mode.
7631 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7632 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7633 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7635 # re-read the instance from the configuration after rename
7636 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7638 if rename_file_storage:
7639 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7640 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7641 old_file_storage_dir,
7642 new_file_storage_dir)
7643 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7644 " (but the instance has been renamed in Ganeti)" %
7645 (inst.primary_node, old_file_storage_dir,
7646 new_file_storage_dir))
7648 _StartInstanceDisks(self, inst, None)
7649 # update info on disks
7650 info = _GetInstanceInfoText(inst)
7651 for (idx, disk) in enumerate(inst.disks):
7652 for node in inst.all_nodes:
7653 self.cfg.SetDiskID(disk, node)
7654 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7656 self.LogWarning("Error setting info on node %s for disk %s: %s",
7657 node, idx, result.fail_msg)
7659 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7660 old_name, self.op.debug_level)
7661 msg = result.fail_msg
7663 msg = ("Could not run OS rename script for instance %s on node %s"
7664 " (but the instance has been renamed in Ganeti): %s" %
7665 (inst.name, inst.primary_node, msg))
7666 self.LogWarning(msg)
7668 _ShutdownInstanceDisks(self, inst)
7673 class LUInstanceRemove(LogicalUnit):
7674 """Remove an instance.
7677 HPATH = "instance-remove"
7678 HTYPE = constants.HTYPE_INSTANCE
7681 def ExpandNames(self):
7682 self._ExpandAndLockInstance()
7683 self.needed_locks[locking.LEVEL_NODE] = []
7684 self.needed_locks[locking.LEVEL_NODE_RES] = []
7685 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7687 def DeclareLocks(self, level):
7688 if level == locking.LEVEL_NODE:
7689 self._LockInstancesNodes()
7690 elif level == locking.LEVEL_NODE_RES:
7692 self.needed_locks[locking.LEVEL_NODE_RES] = \
7693 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7695 def BuildHooksEnv(self):
7698 This runs on master, primary and secondary nodes of the instance.
7701 env = _BuildInstanceHookEnvByObject(self, self.instance)
7702 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7705 def BuildHooksNodes(self):
7706 """Build hooks nodes.
7709 nl = [self.cfg.GetMasterNode()]
7710 nl_post = list(self.instance.all_nodes) + nl
7711 return (nl, nl_post)
7713 def CheckPrereq(self):
7714 """Check prerequisites.
7716 This checks that the instance is in the cluster.
7719 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7720 assert self.instance is not None, \
7721 "Cannot retrieve locked instance %s" % self.op.instance_name
7723 def Exec(self, feedback_fn):
7724 """Remove the instance.
7727 instance = self.instance
7728 logging.info("Shutting down instance %s on node %s",
7729 instance.name, instance.primary_node)
7731 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7732 self.op.shutdown_timeout)
7733 msg = result.fail_msg
7735 if self.op.ignore_failures:
7736 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7738 raise errors.OpExecError("Could not shutdown instance %s on"
7740 (instance.name, instance.primary_node, msg))
7742 assert (self.owned_locks(locking.LEVEL_NODE) ==
7743 self.owned_locks(locking.LEVEL_NODE_RES))
7744 assert not (set(instance.all_nodes) -
7745 self.owned_locks(locking.LEVEL_NODE)), \
7746 "Not owning correct locks"
7748 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7751 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7752 """Utility function to remove an instance.
7755 logging.info("Removing block devices for instance %s", instance.name)
7757 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7758 if not ignore_failures:
7759 raise errors.OpExecError("Can't remove instance's disks")
7760 feedback_fn("Warning: can't remove instance's disks")
7762 logging.info("Removing instance %s out of cluster config", instance.name)
7764 lu.cfg.RemoveInstance(instance.name)
7766 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7767 "Instance lock removal conflict"
7769 # Remove lock for the instance
7770 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7773 class LUInstanceQuery(NoHooksLU):
7774 """Logical unit for querying instances.
7777 # pylint: disable=W0142
7780 def CheckArguments(self):
7781 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7782 self.op.output_fields, self.op.use_locking)
7784 def ExpandNames(self):
7785 self.iq.ExpandNames(self)
7787 def DeclareLocks(self, level):
7788 self.iq.DeclareLocks(self, level)
7790 def Exec(self, feedback_fn):
7791 return self.iq.OldStyleQuery(self)
7794 def _ExpandNamesForMigration(lu):
7795 """Expands names for use with L{TLMigrateInstance}.
7797 @type lu: L{LogicalUnit}
7800 if lu.op.target_node is not None:
7801 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7803 lu.needed_locks[locking.LEVEL_NODE] = []
7804 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7806 lu.needed_locks[locking.LEVEL_NODE_RES] = []
7807 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7810 def _DeclareLocksForMigration(lu, level):
7811 """Declares locks for L{TLMigrateInstance}.
7813 @type lu: L{LogicalUnit}
7814 @param level: Lock level
7817 if level == locking.LEVEL_NODE:
7818 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7819 if instance.disk_template in constants.DTS_EXT_MIRROR:
7820 if lu.op.target_node is None:
7821 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7823 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7825 del lu.recalculate_locks[locking.LEVEL_NODE]
7827 lu._LockInstancesNodes() # pylint: disable=W0212
7828 elif level == locking.LEVEL_NODE_RES:
7830 lu.needed_locks[locking.LEVEL_NODE_RES] = \
7831 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7834 class LUInstanceFailover(LogicalUnit):
7835 """Failover an instance.
7838 HPATH = "instance-failover"
7839 HTYPE = constants.HTYPE_INSTANCE
7842 def CheckArguments(self):
7843 """Check the arguments.
7846 self.iallocator = getattr(self.op, "iallocator", None)
7847 self.target_node = getattr(self.op, "target_node", None)
7849 def ExpandNames(self):
7850 self._ExpandAndLockInstance()
7851 _ExpandNamesForMigration(self)
7854 TLMigrateInstance(self, self.op.instance_name, False, True, False,
7855 self.op.ignore_consistency, True,
7856 self.op.shutdown_timeout, self.op.ignore_ipolicy)
7858 self.tasklets = [self._migrater]
7860 def DeclareLocks(self, level):
7861 _DeclareLocksForMigration(self, level)
7863 def BuildHooksEnv(self):
7866 This runs on master, primary and secondary nodes of the instance.
7869 instance = self._migrater.instance
7870 source_node = instance.primary_node
7871 target_node = self.op.target_node
7873 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7874 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7875 "OLD_PRIMARY": source_node,
7876 "NEW_PRIMARY": target_node,
7879 if instance.disk_template in constants.DTS_INT_MIRROR:
7880 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7881 env["NEW_SECONDARY"] = source_node
7883 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7885 env.update(_BuildInstanceHookEnvByObject(self, instance))
7889 def BuildHooksNodes(self):
7890 """Build hooks nodes.
7893 instance = self._migrater.instance
7894 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7895 return (nl, nl + [instance.primary_node])
7898 class LUInstanceMigrate(LogicalUnit):
7899 """Migrate an instance.
7901 This is migration without shutting down, compared to the failover,
7902 which is done with shutdown.
7905 HPATH = "instance-migrate"
7906 HTYPE = constants.HTYPE_INSTANCE
7909 def ExpandNames(self):
7910 self._ExpandAndLockInstance()
7911 _ExpandNamesForMigration(self)
7914 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7915 False, self.op.allow_failover, False,
7916 self.op.allow_runtime_changes,
7917 constants.DEFAULT_SHUTDOWN_TIMEOUT,
7918 self.op.ignore_ipolicy)
7920 self.tasklets = [self._migrater]
7922 def DeclareLocks(self, level):
7923 _DeclareLocksForMigration(self, level)
7925 def BuildHooksEnv(self):
7928 This runs on master, primary and secondary nodes of the instance.
7931 instance = self._migrater.instance
7932 source_node = instance.primary_node
7933 target_node = self.op.target_node
7934 env = _BuildInstanceHookEnvByObject(self, instance)
7936 "MIGRATE_LIVE": self._migrater.live,
7937 "MIGRATE_CLEANUP": self.op.cleanup,
7938 "OLD_PRIMARY": source_node,
7939 "NEW_PRIMARY": target_node,
7940 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7943 if instance.disk_template in constants.DTS_INT_MIRROR:
7944 env["OLD_SECONDARY"] = target_node
7945 env["NEW_SECONDARY"] = source_node
7947 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7951 def BuildHooksNodes(self):
7952 """Build hooks nodes.
7955 instance = self._migrater.instance
7956 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7957 return (nl, nl + [instance.primary_node])
7960 class LUInstanceMove(LogicalUnit):
7961 """Move an instance by data-copying.
7964 HPATH = "instance-move"
7965 HTYPE = constants.HTYPE_INSTANCE
7968 def ExpandNames(self):
7969 self._ExpandAndLockInstance()
7970 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7971 self.op.target_node = target_node
7972 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7973 self.needed_locks[locking.LEVEL_NODE_RES] = []
7974 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7976 def DeclareLocks(self, level):
7977 if level == locking.LEVEL_NODE:
7978 self._LockInstancesNodes(primary_only=True)
7979 elif level == locking.LEVEL_NODE_RES:
7981 self.needed_locks[locking.LEVEL_NODE_RES] = \
7982 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7984 def BuildHooksEnv(self):
7987 This runs on master, primary and secondary nodes of the instance.
7991 "TARGET_NODE": self.op.target_node,
7992 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7994 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7997 def BuildHooksNodes(self):
7998 """Build hooks nodes.
8002 self.cfg.GetMasterNode(),
8003 self.instance.primary_node,
8004 self.op.target_node,
8008 def CheckPrereq(self):
8009 """Check prerequisites.
8011 This checks that the instance is in the cluster.
8014 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8015 assert self.instance is not None, \
8016 "Cannot retrieve locked instance %s" % self.op.instance_name
8018 node = self.cfg.GetNodeInfo(self.op.target_node)
8019 assert node is not None, \
8020 "Cannot retrieve locked node %s" % self.op.target_node
8022 self.target_node = target_node = node.name
8024 if target_node == instance.primary_node:
8025 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8026 (instance.name, target_node),
8029 bep = self.cfg.GetClusterInfo().FillBE(instance)
8031 for idx, dsk in enumerate(instance.disks):
8032 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8033 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8034 " cannot copy" % idx, errors.ECODE_STATE)
8036 _CheckNodeOnline(self, target_node)
8037 _CheckNodeNotDrained(self, target_node)
8038 _CheckNodeVmCapable(self, target_node)
8039 cluster = self.cfg.GetClusterInfo()
8040 group_info = self.cfg.GetNodeGroup(node.group)
8041 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8042 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8043 ignore=self.op.ignore_ipolicy)
8045 if instance.admin_state == constants.ADMINST_UP:
8046 # check memory requirements on the secondary node
8047 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8048 instance.name, bep[constants.BE_MAXMEM],
8049 instance.hypervisor)
8051 self.LogInfo("Not checking memory on the secondary node as"
8052 " instance will not be started")
8054 # check bridge existance
8055 _CheckInstanceBridgesExist(self, instance, node=target_node)
8057 def Exec(self, feedback_fn):
8058 """Move an instance.
8060 The move is done by shutting it down on its present node, copying
8061 the data over (slow) and starting it on the new node.
8064 instance = self.instance
8066 source_node = instance.primary_node
8067 target_node = self.target_node
8069 self.LogInfo("Shutting down instance %s on source node %s",
8070 instance.name, source_node)
8072 assert (self.owned_locks(locking.LEVEL_NODE) ==
8073 self.owned_locks(locking.LEVEL_NODE_RES))
8075 result = self.rpc.call_instance_shutdown(source_node, instance,
8076 self.op.shutdown_timeout)
8077 msg = result.fail_msg
8079 if self.op.ignore_consistency:
8080 self.LogWarning("Could not shutdown instance %s on node %s."
8081 " Proceeding anyway. Please make sure node"
8082 " %s is down. Error details: %s",
8083 instance.name, source_node, source_node, msg)
8085 raise errors.OpExecError("Could not shutdown instance %s on"
8087 (instance.name, source_node, msg))
8089 # create the target disks
8091 _CreateDisks(self, instance, target_node=target_node)
8092 except errors.OpExecError:
8093 self.LogWarning("Device creation failed, reverting...")
8095 _RemoveDisks(self, instance, target_node=target_node)
8097 self.cfg.ReleaseDRBDMinors(instance.name)
8100 cluster_name = self.cfg.GetClusterInfo().cluster_name
8103 # activate, get path, copy the data over
8104 for idx, disk in enumerate(instance.disks):
8105 self.LogInfo("Copying data for disk %d", idx)
8106 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8107 instance.name, True, idx)
8109 self.LogWarning("Can't assemble newly created disk %d: %s",
8110 idx, result.fail_msg)
8111 errs.append(result.fail_msg)
8113 dev_path = result.payload
8114 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8115 target_node, dev_path,
8118 self.LogWarning("Can't copy data over for disk %d: %s",
8119 idx, result.fail_msg)
8120 errs.append(result.fail_msg)
8124 self.LogWarning("Some disks failed to copy, aborting")
8126 _RemoveDisks(self, instance, target_node=target_node)
8128 self.cfg.ReleaseDRBDMinors(instance.name)
8129 raise errors.OpExecError("Errors during disk copy: %s" %
8132 instance.primary_node = target_node
8133 self.cfg.Update(instance, feedback_fn)
8135 self.LogInfo("Removing the disks on the original node")
8136 _RemoveDisks(self, instance, target_node=source_node)
8138 # Only start the instance if it's marked as up
8139 if instance.admin_state == constants.ADMINST_UP:
8140 self.LogInfo("Starting instance %s on node %s",
8141 instance.name, target_node)
8143 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8144 ignore_secondaries=True)
8146 _ShutdownInstanceDisks(self, instance)
8147 raise errors.OpExecError("Can't activate the instance's disks")
8149 result = self.rpc.call_instance_start(target_node,
8150 (instance, None, None), False)
8151 msg = result.fail_msg
8153 _ShutdownInstanceDisks(self, instance)
8154 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8155 (instance.name, target_node, msg))
8158 class LUNodeMigrate(LogicalUnit):
8159 """Migrate all instances from a node.
8162 HPATH = "node-migrate"
8163 HTYPE = constants.HTYPE_NODE
8166 def CheckArguments(self):
8169 def ExpandNames(self):
8170 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8172 self.share_locks = _ShareAll()
8173 self.needed_locks = {
8174 locking.LEVEL_NODE: [self.op.node_name],
8177 def BuildHooksEnv(self):
8180 This runs on the master, the primary and all the secondaries.
8184 "NODE_NAME": self.op.node_name,
8185 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8188 def BuildHooksNodes(self):
8189 """Build hooks nodes.
8192 nl = [self.cfg.GetMasterNode()]
8195 def CheckPrereq(self):
8198 def Exec(self, feedback_fn):
8199 # Prepare jobs for migration instances
8200 allow_runtime_changes = self.op.allow_runtime_changes
8202 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8205 iallocator=self.op.iallocator,
8206 target_node=self.op.target_node,
8207 allow_runtime_changes=allow_runtime_changes,
8208 ignore_ipolicy=self.op.ignore_ipolicy)]
8209 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8211 # TODO: Run iallocator in this opcode and pass correct placement options to
8212 # OpInstanceMigrate. Since other jobs can modify the cluster between
8213 # running the iallocator and the actual migration, a good consistency model
8214 # will have to be found.
8216 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8217 frozenset([self.op.node_name]))
8219 return ResultWithJobs(jobs)
8222 class TLMigrateInstance(Tasklet):
8223 """Tasklet class for instance migration.
8226 @ivar live: whether the migration will be done live or non-live;
8227 this variable is initalized only after CheckPrereq has run
8228 @type cleanup: boolean
8229 @ivar cleanup: Wheater we cleanup from a failed migration
8230 @type iallocator: string
8231 @ivar iallocator: The iallocator used to determine target_node
8232 @type target_node: string
8233 @ivar target_node: If given, the target_node to reallocate the instance to
8234 @type failover: boolean
8235 @ivar failover: Whether operation results in failover or migration
8236 @type fallback: boolean
8237 @ivar fallback: Whether fallback to failover is allowed if migration not
8239 @type ignore_consistency: boolean
8240 @ivar ignore_consistency: Wheter we should ignore consistency between source
8242 @type shutdown_timeout: int
8243 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8244 @type ignore_ipolicy: bool
8245 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8250 _MIGRATION_POLL_INTERVAL = 1 # seconds
8251 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8253 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8254 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8256 """Initializes this class.
8259 Tasklet.__init__(self, lu)
8262 self.instance_name = instance_name
8263 self.cleanup = cleanup
8264 self.live = False # will be overridden later
8265 self.failover = failover
8266 self.fallback = fallback
8267 self.ignore_consistency = ignore_consistency
8268 self.shutdown_timeout = shutdown_timeout
8269 self.ignore_ipolicy = ignore_ipolicy
8270 self.allow_runtime_changes = allow_runtime_changes
8272 def CheckPrereq(self):
8273 """Check prerequisites.
8275 This checks that the instance is in the cluster.
8278 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8279 instance = self.cfg.GetInstanceInfo(instance_name)
8280 assert instance is not None
8281 self.instance = instance
8282 cluster = self.cfg.GetClusterInfo()
8284 if (not self.cleanup and
8285 not instance.admin_state == constants.ADMINST_UP and
8286 not self.failover and self.fallback):
8287 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8288 " switching to failover")
8289 self.failover = True
8291 if instance.disk_template not in constants.DTS_MIRRORED:
8296 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8297 " %s" % (instance.disk_template, text),
8300 if instance.disk_template in constants.DTS_EXT_MIRROR:
8301 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8303 if self.lu.op.iallocator:
8304 self._RunAllocator()
8306 # We set set self.target_node as it is required by
8308 self.target_node = self.lu.op.target_node
8310 # Check that the target node is correct in terms of instance policy
8311 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8312 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8313 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8315 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8316 ignore=self.ignore_ipolicy)
8318 # self.target_node is already populated, either directly or by the
8320 target_node = self.target_node
8321 if self.target_node == instance.primary_node:
8322 raise errors.OpPrereqError("Cannot migrate instance %s"
8323 " to its primary (%s)" %
8324 (instance.name, instance.primary_node),
8327 if len(self.lu.tasklets) == 1:
8328 # It is safe to release locks only when we're the only tasklet
8330 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8331 keep=[instance.primary_node, self.target_node])
8334 secondary_nodes = instance.secondary_nodes
8335 if not secondary_nodes:
8336 raise errors.ConfigurationError("No secondary node but using"
8337 " %s disk template" %
8338 instance.disk_template)
8339 target_node = secondary_nodes[0]
8340 if self.lu.op.iallocator or (self.lu.op.target_node and
8341 self.lu.op.target_node != target_node):
8343 text = "failed over"
8346 raise errors.OpPrereqError("Instances with disk template %s cannot"
8347 " be %s to arbitrary nodes"
8348 " (neither an iallocator nor a target"
8349 " node can be passed)" %
8350 (instance.disk_template, text),
8352 nodeinfo = self.cfg.GetNodeInfo(target_node)
8353 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8354 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8356 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8357 ignore=self.ignore_ipolicy)
8359 i_be = cluster.FillBE(instance)
8361 # check memory requirements on the secondary node
8362 if (not self.cleanup and
8363 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8364 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8365 "migrating instance %s" %
8367 i_be[constants.BE_MINMEM],
8368 instance.hypervisor)
8370 self.lu.LogInfo("Not checking memory on the secondary node as"
8371 " instance will not be started")
8373 # check if failover must be forced instead of migration
8374 if (not self.cleanup and not self.failover and
8375 i_be[constants.BE_ALWAYS_FAILOVER]):
8376 self.lu.LogInfo("Instance configured to always failover; fallback"
8378 self.failover = True
8380 # check bridge existance
8381 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8383 if not self.cleanup:
8384 _CheckNodeNotDrained(self.lu, target_node)
8385 if not self.failover:
8386 result = self.rpc.call_instance_migratable(instance.primary_node,
8388 if result.fail_msg and self.fallback:
8389 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8391 self.failover = True
8393 result.Raise("Can't migrate, please use failover",
8394 prereq=True, ecode=errors.ECODE_STATE)
8396 assert not (self.failover and self.cleanup)
8398 if not self.failover:
8399 if self.lu.op.live is not None and self.lu.op.mode is not None:
8400 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8401 " parameters are accepted",
8403 if self.lu.op.live is not None:
8405 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8407 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8408 # reset the 'live' parameter to None so that repeated
8409 # invocations of CheckPrereq do not raise an exception
8410 self.lu.op.live = None
8411 elif self.lu.op.mode is None:
8412 # read the default value from the hypervisor
8413 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8414 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8416 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8418 # Failover is never live
8421 if not (self.failover or self.cleanup):
8422 remote_info = self.rpc.call_instance_info(instance.primary_node,
8424 instance.hypervisor)
8425 remote_info.Raise("Error checking instance on node %s" %
8426 instance.primary_node)
8427 instance_running = bool(remote_info.payload)
8428 if instance_running:
8429 self.current_mem = int(remote_info.payload["memory"])
8431 def _RunAllocator(self):
8432 """Run the allocator based on input opcode.
8435 # FIXME: add a self.ignore_ipolicy option
8436 req = iallocator.IAReqRelocate(name=self.instance_name,
8437 relocate_from=[self.instance.primary_node])
8438 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8440 ial.Run(self.lu.op.iallocator)
8443 raise errors.OpPrereqError("Can't compute nodes using"
8444 " iallocator '%s': %s" %
8445 (self.lu.op.iallocator, ial.info),
8447 self.target_node = ial.result[0]
8448 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8449 self.instance_name, self.lu.op.iallocator,
8450 utils.CommaJoin(ial.result))
8452 def _WaitUntilSync(self):
8453 """Poll with custom rpc for disk sync.
8455 This uses our own step-based rpc call.
8458 self.feedback_fn("* wait until resync is done")
8462 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8464 (self.instance.disks,
8467 for node, nres in result.items():
8468 nres.Raise("Cannot resync disks on node %s" % node)
8469 node_done, node_percent = nres.payload
8470 all_done = all_done and node_done
8471 if node_percent is not None:
8472 min_percent = min(min_percent, node_percent)
8474 if min_percent < 100:
8475 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8478 def _EnsureSecondary(self, node):
8479 """Demote a node to secondary.
8482 self.feedback_fn("* switching node %s to secondary mode" % node)
8484 for dev in self.instance.disks:
8485 self.cfg.SetDiskID(dev, node)
8487 result = self.rpc.call_blockdev_close(node, self.instance.name,
8488 self.instance.disks)
8489 result.Raise("Cannot change disk to secondary on node %s" % node)
8491 def _GoStandalone(self):
8492 """Disconnect from the network.
8495 self.feedback_fn("* changing into standalone mode")
8496 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8497 self.instance.disks)
8498 for node, nres in result.items():
8499 nres.Raise("Cannot disconnect disks node %s" % node)
8501 def _GoReconnect(self, multimaster):
8502 """Reconnect to the network.
8508 msg = "single-master"
8509 self.feedback_fn("* changing disks into %s mode" % msg)
8510 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8511 (self.instance.disks, self.instance),
8512 self.instance.name, multimaster)
8513 for node, nres in result.items():
8514 nres.Raise("Cannot change disks config on node %s" % node)
8516 def _ExecCleanup(self):
8517 """Try to cleanup after a failed migration.
8519 The cleanup is done by:
8520 - check that the instance is running only on one node
8521 (and update the config if needed)
8522 - change disks on its secondary node to secondary
8523 - wait until disks are fully synchronized
8524 - disconnect from the network
8525 - change disks into single-master mode
8526 - wait again until disks are fully synchronized
8529 instance = self.instance
8530 target_node = self.target_node
8531 source_node = self.source_node
8533 # check running on only one node
8534 self.feedback_fn("* checking where the instance actually runs"
8535 " (if this hangs, the hypervisor might be in"
8537 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8538 for node, result in ins_l.items():
8539 result.Raise("Can't contact node %s" % node)
8541 runningon_source = instance.name in ins_l[source_node].payload
8542 runningon_target = instance.name in ins_l[target_node].payload
8544 if runningon_source and runningon_target:
8545 raise errors.OpExecError("Instance seems to be running on two nodes,"
8546 " or the hypervisor is confused; you will have"
8547 " to ensure manually that it runs only on one"
8548 " and restart this operation")
8550 if not (runningon_source or runningon_target):
8551 raise errors.OpExecError("Instance does not seem to be running at all;"
8552 " in this case it's safer to repair by"
8553 " running 'gnt-instance stop' to ensure disk"
8554 " shutdown, and then restarting it")
8556 if runningon_target:
8557 # the migration has actually succeeded, we need to update the config
8558 self.feedback_fn("* instance running on secondary node (%s),"
8559 " updating config" % target_node)
8560 instance.primary_node = target_node
8561 self.cfg.Update(instance, self.feedback_fn)
8562 demoted_node = source_node
8564 self.feedback_fn("* instance confirmed to be running on its"
8565 " primary node (%s)" % source_node)
8566 demoted_node = target_node
8568 if instance.disk_template in constants.DTS_INT_MIRROR:
8569 self._EnsureSecondary(demoted_node)
8571 self._WaitUntilSync()
8572 except errors.OpExecError:
8573 # we ignore here errors, since if the device is standalone, it
8574 # won't be able to sync
8576 self._GoStandalone()
8577 self._GoReconnect(False)
8578 self._WaitUntilSync()
8580 self.feedback_fn("* done")
8582 def _RevertDiskStatus(self):
8583 """Try to revert the disk status after a failed migration.
8586 target_node = self.target_node
8587 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8591 self._EnsureSecondary(target_node)
8592 self._GoStandalone()
8593 self._GoReconnect(False)
8594 self._WaitUntilSync()
8595 except errors.OpExecError, err:
8596 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8597 " please try to recover the instance manually;"
8598 " error '%s'" % str(err))
8600 def _AbortMigration(self):
8601 """Call the hypervisor code to abort a started migration.
8604 instance = self.instance
8605 target_node = self.target_node
8606 source_node = self.source_node
8607 migration_info = self.migration_info
8609 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8613 abort_msg = abort_result.fail_msg
8615 logging.error("Aborting migration failed on target node %s: %s",
8616 target_node, abort_msg)
8617 # Don't raise an exception here, as we stil have to try to revert the
8618 # disk status, even if this step failed.
8620 abort_result = self.rpc.call_instance_finalize_migration_src(
8621 source_node, instance, False, self.live)
8622 abort_msg = abort_result.fail_msg
8624 logging.error("Aborting migration failed on source node %s: %s",
8625 source_node, abort_msg)
8627 def _ExecMigration(self):
8628 """Migrate an instance.
8630 The migrate is done by:
8631 - change the disks into dual-master mode
8632 - wait until disks are fully synchronized again
8633 - migrate the instance
8634 - change disks on the new secondary node (the old primary) to secondary
8635 - wait until disks are fully synchronized
8636 - change disks into single-master mode
8639 instance = self.instance
8640 target_node = self.target_node
8641 source_node = self.source_node
8643 # Check for hypervisor version mismatch and warn the user.
8644 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8645 None, [self.instance.hypervisor])
8646 for ninfo in nodeinfo.values():
8647 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8649 (_, _, (src_info, )) = nodeinfo[source_node].payload
8650 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8652 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8653 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8654 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8655 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8656 if src_version != dst_version:
8657 self.feedback_fn("* warning: hypervisor version mismatch between"
8658 " source (%s) and target (%s) node" %
8659 (src_version, dst_version))
8661 self.feedback_fn("* checking disk consistency between source and target")
8662 for (idx, dev) in enumerate(instance.disks):
8663 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8664 raise errors.OpExecError("Disk %s is degraded or not fully"
8665 " synchronized on target node,"
8666 " aborting migration" % idx)
8668 if self.current_mem > self.tgt_free_mem:
8669 if not self.allow_runtime_changes:
8670 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8671 " free memory to fit instance %s on target"
8672 " node %s (have %dMB, need %dMB)" %
8673 (instance.name, target_node,
8674 self.tgt_free_mem, self.current_mem))
8675 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8676 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8679 rpcres.Raise("Cannot modify instance runtime memory")
8681 # First get the migration information from the remote node
8682 result = self.rpc.call_migration_info(source_node, instance)
8683 msg = result.fail_msg
8685 log_err = ("Failed fetching source migration information from %s: %s" %
8687 logging.error(log_err)
8688 raise errors.OpExecError(log_err)
8690 self.migration_info = migration_info = result.payload
8692 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8693 # Then switch the disks to master/master mode
8694 self._EnsureSecondary(target_node)
8695 self._GoStandalone()
8696 self._GoReconnect(True)
8697 self._WaitUntilSync()
8699 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8700 result = self.rpc.call_accept_instance(target_node,
8703 self.nodes_ip[target_node])
8705 msg = result.fail_msg
8707 logging.error("Instance pre-migration failed, trying to revert"
8708 " disk status: %s", msg)
8709 self.feedback_fn("Pre-migration failed, aborting")
8710 self._AbortMigration()
8711 self._RevertDiskStatus()
8712 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8713 (instance.name, msg))
8715 self.feedback_fn("* migrating instance to %s" % target_node)
8716 result = self.rpc.call_instance_migrate(source_node, instance,
8717 self.nodes_ip[target_node],
8719 msg = result.fail_msg
8721 logging.error("Instance migration failed, trying to revert"
8722 " disk status: %s", msg)
8723 self.feedback_fn("Migration failed, aborting")
8724 self._AbortMigration()
8725 self._RevertDiskStatus()
8726 raise errors.OpExecError("Could not migrate instance %s: %s" %
8727 (instance.name, msg))
8729 self.feedback_fn("* starting memory transfer")
8730 last_feedback = time.time()
8732 result = self.rpc.call_instance_get_migration_status(source_node,
8734 msg = result.fail_msg
8735 ms = result.payload # MigrationStatus instance
8736 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8737 logging.error("Instance migration failed, trying to revert"
8738 " disk status: %s", msg)
8739 self.feedback_fn("Migration failed, aborting")
8740 self._AbortMigration()
8741 self._RevertDiskStatus()
8743 msg = "hypervisor returned failure"
8744 raise errors.OpExecError("Could not migrate instance %s: %s" %
8745 (instance.name, msg))
8747 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8748 self.feedback_fn("* memory transfer complete")
8751 if (utils.TimeoutExpired(last_feedback,
8752 self._MIGRATION_FEEDBACK_INTERVAL) and
8753 ms.transferred_ram is not None):
8754 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8755 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8756 last_feedback = time.time()
8758 time.sleep(self._MIGRATION_POLL_INTERVAL)
8760 result = self.rpc.call_instance_finalize_migration_src(source_node,
8764 msg = result.fail_msg
8766 logging.error("Instance migration succeeded, but finalization failed"
8767 " on the source node: %s", msg)
8768 raise errors.OpExecError("Could not finalize instance migration: %s" %
8771 instance.primary_node = target_node
8773 # distribute new instance config to the other nodes
8774 self.cfg.Update(instance, self.feedback_fn)
8776 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8780 msg = result.fail_msg
8782 logging.error("Instance migration succeeded, but finalization failed"
8783 " on the target node: %s", msg)
8784 raise errors.OpExecError("Could not finalize instance migration: %s" %
8787 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8788 self._EnsureSecondary(source_node)
8789 self._WaitUntilSync()
8790 self._GoStandalone()
8791 self._GoReconnect(False)
8792 self._WaitUntilSync()
8794 # If the instance's disk template is `rbd' and there was a successful
8795 # migration, unmap the device from the source node.
8796 if self.instance.disk_template == constants.DT_RBD:
8797 disks = _ExpandCheckDisks(instance, instance.disks)
8798 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8800 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8801 msg = result.fail_msg
8803 logging.error("Migration was successful, but couldn't unmap the"
8804 " block device %s on source node %s: %s",
8805 disk.iv_name, source_node, msg)
8806 logging.error("You need to unmap the device %s manually on %s",
8807 disk.iv_name, source_node)
8809 self.feedback_fn("* done")
8811 def _ExecFailover(self):
8812 """Failover an instance.
8814 The failover is done by shutting it down on its present node and
8815 starting it on the secondary.
8818 instance = self.instance
8819 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8821 source_node = instance.primary_node
8822 target_node = self.target_node
8824 if instance.admin_state == constants.ADMINST_UP:
8825 self.feedback_fn("* checking disk consistency between source and target")
8826 for (idx, dev) in enumerate(instance.disks):
8827 # for drbd, these are drbd over lvm
8828 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8830 if primary_node.offline:
8831 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8833 (primary_node.name, idx, target_node))
8834 elif not self.ignore_consistency:
8835 raise errors.OpExecError("Disk %s is degraded on target node,"
8836 " aborting failover" % idx)
8838 self.feedback_fn("* not checking disk consistency as instance is not"
8841 self.feedback_fn("* shutting down instance on source node")
8842 logging.info("Shutting down instance %s on node %s",
8843 instance.name, source_node)
8845 result = self.rpc.call_instance_shutdown(source_node, instance,
8846 self.shutdown_timeout)
8847 msg = result.fail_msg
8849 if self.ignore_consistency or primary_node.offline:
8850 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8851 " proceeding anyway; please make sure node"
8852 " %s is down; error details: %s",
8853 instance.name, source_node, source_node, msg)
8855 raise errors.OpExecError("Could not shutdown instance %s on"
8857 (instance.name, source_node, msg))
8859 self.feedback_fn("* deactivating the instance's disks on source node")
8860 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8861 raise errors.OpExecError("Can't shut down the instance's disks")
8863 instance.primary_node = target_node
8864 # distribute new instance config to the other nodes
8865 self.cfg.Update(instance, self.feedback_fn)
8867 # Only start the instance if it's marked as up
8868 if instance.admin_state == constants.ADMINST_UP:
8869 self.feedback_fn("* activating the instance's disks on target node %s" %
8871 logging.info("Starting instance %s on node %s",
8872 instance.name, target_node)
8874 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8875 ignore_secondaries=True)
8877 _ShutdownInstanceDisks(self.lu, instance)
8878 raise errors.OpExecError("Can't activate the instance's disks")
8880 self.feedback_fn("* starting the instance on the target node %s" %
8882 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8884 msg = result.fail_msg
8886 _ShutdownInstanceDisks(self.lu, instance)
8887 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8888 (instance.name, target_node, msg))
8890 def Exec(self, feedback_fn):
8891 """Perform the migration.
8894 self.feedback_fn = feedback_fn
8895 self.source_node = self.instance.primary_node
8897 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8898 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8899 self.target_node = self.instance.secondary_nodes[0]
8900 # Otherwise self.target_node has been populated either
8901 # directly, or through an iallocator.
8903 self.all_nodes = [self.source_node, self.target_node]
8904 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8905 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8908 feedback_fn("Failover instance %s" % self.instance.name)
8909 self._ExecFailover()
8911 feedback_fn("Migrating instance %s" % self.instance.name)
8914 return self._ExecCleanup()
8916 return self._ExecMigration()
8919 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8921 """Wrapper around L{_CreateBlockDevInner}.
8923 This method annotates the root device first.
8926 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8927 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8931 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8933 """Create a tree of block devices on a given node.
8935 If this device type has to be created on secondaries, create it and
8938 If not, just recurse to children keeping the same 'force' value.
8940 @attention: The device has to be annotated already.
8942 @param lu: the lu on whose behalf we execute
8943 @param node: the node on which to create the device
8944 @type instance: L{objects.Instance}
8945 @param instance: the instance which owns the device
8946 @type device: L{objects.Disk}
8947 @param device: the device to create
8948 @type force_create: boolean
8949 @param force_create: whether to force creation of this device; this
8950 will be change to True whenever we find a device which has
8951 CreateOnSecondary() attribute
8952 @param info: the extra 'metadata' we should attach to the device
8953 (this will be represented as a LVM tag)
8954 @type force_open: boolean
8955 @param force_open: this parameter will be passes to the
8956 L{backend.BlockdevCreate} function where it specifies
8957 whether we run on primary or not, and it affects both
8958 the child assembly and the device own Open() execution
8961 if device.CreateOnSecondary():
8965 for child in device.children:
8966 _CreateBlockDevInner(lu, node, instance, child, force_create,
8969 if not force_create:
8972 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8975 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8976 """Create a single block device on a given node.
8978 This will not recurse over children of the device, so they must be
8981 @param lu: the lu on whose behalf we execute
8982 @param node: the node on which to create the device
8983 @type instance: L{objects.Instance}
8984 @param instance: the instance which owns the device
8985 @type device: L{objects.Disk}
8986 @param device: the device to create
8987 @param info: the extra 'metadata' we should attach to the device
8988 (this will be represented as a LVM tag)
8989 @type force_open: boolean
8990 @param force_open: this parameter will be passes to the
8991 L{backend.BlockdevCreate} function where it specifies
8992 whether we run on primary or not, and it affects both
8993 the child assembly and the device own Open() execution
8996 lu.cfg.SetDiskID(device, node)
8997 result = lu.rpc.call_blockdev_create(node, device, device.size,
8998 instance.name, force_open, info)
8999 result.Raise("Can't create block device %s on"
9000 " node %s for instance %s" % (device, node, instance.name))
9001 if device.physical_id is None:
9002 device.physical_id = result.payload
9005 def _GenerateUniqueNames(lu, exts):
9006 """Generate a suitable LV name.
9008 This will generate a logical volume name for the given instance.
9013 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9014 results.append("%s%s" % (new_id, val))
9018 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9019 iv_name, p_minor, s_minor):
9020 """Generate a drbd8 device complete with its children.
9023 assert len(vgnames) == len(names) == 2
9024 port = lu.cfg.AllocatePort()
9025 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9027 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9028 logical_id=(vgnames[0], names[0]),
9030 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9031 size=constants.DRBD_META_SIZE,
9032 logical_id=(vgnames[1], names[1]),
9034 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9035 logical_id=(primary, secondary, port,
9038 children=[dev_data, dev_meta],
9039 iv_name=iv_name, params={})
9043 _DISK_TEMPLATE_NAME_PREFIX = {
9044 constants.DT_PLAIN: "",
9045 constants.DT_RBD: ".rbd",
9049 _DISK_TEMPLATE_DEVICE_TYPE = {
9050 constants.DT_PLAIN: constants.LD_LV,
9051 constants.DT_FILE: constants.LD_FILE,
9052 constants.DT_SHARED_FILE: constants.LD_FILE,
9053 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9054 constants.DT_RBD: constants.LD_RBD,
9058 def _GenerateDiskTemplate(
9059 lu, template_name, instance_name, primary_node, secondary_nodes,
9060 disk_info, file_storage_dir, file_driver, base_index,
9061 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9062 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9063 """Generate the entire disk layout for a given template type.
9066 #TODO: compute space requirements
9068 vgname = lu.cfg.GetVGName()
9069 disk_count = len(disk_info)
9072 if template_name == constants.DT_DISKLESS:
9074 elif template_name == constants.DT_DRBD8:
9075 if len(secondary_nodes) != 1:
9076 raise errors.ProgrammerError("Wrong template configuration")
9077 remote_node = secondary_nodes[0]
9078 minors = lu.cfg.AllocateDRBDMinor(
9079 [primary_node, remote_node] * len(disk_info), instance_name)
9081 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9083 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9086 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9087 for i in range(disk_count)]):
9088 names.append(lv_prefix + "_data")
9089 names.append(lv_prefix + "_meta")
9090 for idx, disk in enumerate(disk_info):
9091 disk_index = idx + base_index
9092 data_vg = disk.get(constants.IDISK_VG, vgname)
9093 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9094 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9095 disk[constants.IDISK_SIZE],
9097 names[idx * 2:idx * 2 + 2],
9098 "disk/%d" % disk_index,
9099 minors[idx * 2], minors[idx * 2 + 1])
9100 disk_dev.mode = disk[constants.IDISK_MODE]
9101 disks.append(disk_dev)
9104 raise errors.ProgrammerError("Wrong template configuration")
9106 if template_name == constants.DT_FILE:
9108 elif template_name == constants.DT_SHARED_FILE:
9109 _req_shr_file_storage()
9111 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9112 if name_prefix is None:
9115 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9116 (name_prefix, base_index + i)
9117 for i in range(disk_count)])
9119 if template_name == constants.DT_PLAIN:
9121 def logical_id_fn(idx, _, disk):
9122 vg = disk.get(constants.IDISK_VG, vgname)
9123 return (vg, names[idx])
9125 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9127 lambda _, disk_index, disk: (file_driver,
9128 "%s/disk%d" % (file_storage_dir,
9130 elif template_name == constants.DT_BLOCK:
9132 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9133 disk[constants.IDISK_ADOPT])
9134 elif template_name == constants.DT_RBD:
9135 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9137 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9139 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9141 for idx, disk in enumerate(disk_info):
9142 disk_index = idx + base_index
9143 size = disk[constants.IDISK_SIZE]
9144 feedback_fn("* disk %s, size %s" %
9145 (disk_index, utils.FormatUnit(size, "h")))
9146 disks.append(objects.Disk(dev_type=dev_type, size=size,
9147 logical_id=logical_id_fn(idx, disk_index, disk),
9148 iv_name="disk/%d" % disk_index,
9149 mode=disk[constants.IDISK_MODE],
9155 def _GetInstanceInfoText(instance):
9156 """Compute that text that should be added to the disk's metadata.
9159 return "originstname+%s" % instance.name
9162 def _CalcEta(time_taken, written, total_size):
9163 """Calculates the ETA based on size written and total size.
9165 @param time_taken: The time taken so far
9166 @param written: amount written so far
9167 @param total_size: The total size of data to be written
9168 @return: The remaining time in seconds
9171 avg_time = time_taken / float(written)
9172 return (total_size - written) * avg_time
9175 def _WipeDisks(lu, instance, disks=None):
9176 """Wipes instance disks.
9178 @type lu: L{LogicalUnit}
9179 @param lu: the logical unit on whose behalf we execute
9180 @type instance: L{objects.Instance}
9181 @param instance: the instance whose disks we should create
9182 @return: the success of the wipe
9185 node = instance.primary_node
9188 disks = [(idx, disk, 0)
9189 for (idx, disk) in enumerate(instance.disks)]
9191 for (_, device, _) in disks:
9192 lu.cfg.SetDiskID(device, node)
9194 logging.info("Pausing synchronization of disks of instance '%s'",
9196 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9197 (map(compat.snd, disks),
9200 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9202 for idx, success in enumerate(result.payload):
9204 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9205 " failed", idx, instance.name)
9208 for (idx, device, offset) in disks:
9209 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9210 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9212 int(min(constants.MAX_WIPE_CHUNK,
9213 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9217 start_time = time.time()
9222 info_text = (" (from %s to %s)" %
9223 (utils.FormatUnit(offset, "h"),
9224 utils.FormatUnit(size, "h")))
9226 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9228 logging.info("Wiping disk %d for instance %s on node %s using"
9229 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9231 while offset < size:
9232 wipe_size = min(wipe_chunk_size, size - offset)
9234 logging.debug("Wiping disk %d, offset %s, chunk %s",
9235 idx, offset, wipe_size)
9237 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9239 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9240 (idx, offset, wipe_size))
9244 if now - last_output >= 60:
9245 eta = _CalcEta(now - start_time, offset, size)
9246 lu.LogInfo(" - done: %.1f%% ETA: %s",
9247 offset / float(size) * 100, utils.FormatSeconds(eta))
9250 logging.info("Resuming synchronization of disks for instance '%s'",
9253 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9254 (map(compat.snd, disks),
9259 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9260 node, result.fail_msg)
9262 for idx, success in enumerate(result.payload):
9264 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9265 " failed", idx, instance.name)
9268 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9269 """Create all disks for an instance.
9271 This abstracts away some work from AddInstance.
9273 @type lu: L{LogicalUnit}
9274 @param lu: the logical unit on whose behalf we execute
9275 @type instance: L{objects.Instance}
9276 @param instance: the instance whose disks we should create
9278 @param to_skip: list of indices to skip
9279 @type target_node: string
9280 @param target_node: if passed, overrides the target node for creation
9282 @return: the success of the creation
9285 info = _GetInstanceInfoText(instance)
9286 if target_node is None:
9287 pnode = instance.primary_node
9288 all_nodes = instance.all_nodes
9293 if instance.disk_template in constants.DTS_FILEBASED:
9294 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9295 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9297 result.Raise("Failed to create directory '%s' on"
9298 " node %s" % (file_storage_dir, pnode))
9300 # Note: this needs to be kept in sync with adding of disks in
9301 # LUInstanceSetParams
9302 for idx, device in enumerate(instance.disks):
9303 if to_skip and idx in to_skip:
9305 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9307 for node in all_nodes:
9308 f_create = node == pnode
9309 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9312 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9313 """Remove all disks for an instance.
9315 This abstracts away some work from `AddInstance()` and
9316 `RemoveInstance()`. Note that in case some of the devices couldn't
9317 be removed, the removal will continue with the other ones (compare
9318 with `_CreateDisks()`).
9320 @type lu: L{LogicalUnit}
9321 @param lu: the logical unit on whose behalf we execute
9322 @type instance: L{objects.Instance}
9323 @param instance: the instance whose disks we should remove
9324 @type target_node: string
9325 @param target_node: used to override the node on which to remove the disks
9327 @return: the success of the removal
9330 logging.info("Removing block devices for instance %s", instance.name)
9333 ports_to_release = set()
9334 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9335 for (idx, device) in enumerate(anno_disks):
9337 edata = [(target_node, device)]
9339 edata = device.ComputeNodeTree(instance.primary_node)
9340 for node, disk in edata:
9341 lu.cfg.SetDiskID(disk, node)
9342 result = lu.rpc.call_blockdev_remove(node, disk)
9344 lu.LogWarning("Could not remove disk %s on node %s,"
9345 " continuing anyway: %s", idx, node, result.fail_msg)
9346 if not (result.offline and node != instance.primary_node):
9349 # if this is a DRBD disk, return its port to the pool
9350 if device.dev_type in constants.LDS_DRBD:
9351 ports_to_release.add(device.logical_id[2])
9353 if all_result or ignore_failures:
9354 for port in ports_to_release:
9355 lu.cfg.AddTcpUdpPort(port)
9357 if instance.disk_template in constants.DTS_FILEBASED:
9358 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9362 tgt = instance.primary_node
9363 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9365 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9366 file_storage_dir, instance.primary_node, result.fail_msg)
9372 def _ComputeDiskSizePerVG(disk_template, disks):
9373 """Compute disk size requirements in the volume group
9376 def _compute(disks, payload):
9377 """Universal algorithm.
9382 vgs[disk[constants.IDISK_VG]] = \
9383 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9387 # Required free disk space as a function of disk and swap space
9389 constants.DT_DISKLESS: {},
9390 constants.DT_PLAIN: _compute(disks, 0),
9391 # 128 MB are added for drbd metadata for each disk
9392 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9393 constants.DT_FILE: {},
9394 constants.DT_SHARED_FILE: {},
9397 if disk_template not in req_size_dict:
9398 raise errors.ProgrammerError("Disk template '%s' size requirement"
9399 " is unknown" % disk_template)
9401 return req_size_dict[disk_template]
9404 def _FilterVmNodes(lu, nodenames):
9405 """Filters out non-vm_capable nodes from a list.
9407 @type lu: L{LogicalUnit}
9408 @param lu: the logical unit for which we check
9409 @type nodenames: list
9410 @param nodenames: the list of nodes on which we should check
9412 @return: the list of vm-capable nodes
9415 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9416 return [name for name in nodenames if name not in vm_nodes]
9419 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9420 """Hypervisor parameter validation.
9422 This function abstract the hypervisor parameter validation to be
9423 used in both instance create and instance modify.
9425 @type lu: L{LogicalUnit}
9426 @param lu: the logical unit for which we check
9427 @type nodenames: list
9428 @param nodenames: the list of nodes on which we should check
9429 @type hvname: string
9430 @param hvname: the name of the hypervisor we should use
9431 @type hvparams: dict
9432 @param hvparams: the parameters which we need to check
9433 @raise errors.OpPrereqError: if the parameters are not valid
9436 nodenames = _FilterVmNodes(lu, nodenames)
9438 cluster = lu.cfg.GetClusterInfo()
9439 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9441 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9442 for node in nodenames:
9446 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9449 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9450 """OS parameters validation.
9452 @type lu: L{LogicalUnit}
9453 @param lu: the logical unit for which we check
9454 @type required: boolean
9455 @param required: whether the validation should fail if the OS is not
9457 @type nodenames: list
9458 @param nodenames: the list of nodes on which we should check
9459 @type osname: string
9460 @param osname: the name of the hypervisor we should use
9461 @type osparams: dict
9462 @param osparams: the parameters which we need to check
9463 @raise errors.OpPrereqError: if the parameters are not valid
9466 nodenames = _FilterVmNodes(lu, nodenames)
9467 result = lu.rpc.call_os_validate(nodenames, required, osname,
9468 [constants.OS_VALIDATE_PARAMETERS],
9470 for node, nres in result.items():
9471 # we don't check for offline cases since this should be run only
9472 # against the master node and/or an instance's nodes
9473 nres.Raise("OS Parameters validation failed on node %s" % node)
9474 if not nres.payload:
9475 lu.LogInfo("OS %s not found on node %s, validation skipped",
9479 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9480 """Wrapper around IAReqInstanceAlloc.
9482 @param op: The instance opcode
9483 @param disks: The computed disks
9484 @param nics: The computed nics
9485 @param beparams: The full filled beparams
9487 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9490 spindle_use = beparams[constants.BE_SPINDLE_USE]
9491 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9492 disk_template=op.disk_template,
9495 vcpus=beparams[constants.BE_VCPUS],
9496 memory=beparams[constants.BE_MAXMEM],
9497 spindle_use=spindle_use,
9499 nics=[n.ToDict() for n in nics],
9500 hypervisor=op.hypervisor)
9503 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9504 """Computes the nics.
9506 @param op: The instance opcode
9507 @param cluster: Cluster configuration object
9508 @param default_ip: The default ip to assign
9509 @param cfg: An instance of the configuration object
9510 @param ec_id: Execution context ID
9512 @returns: The build up nics
9517 nic_mode_req = nic.get(constants.INIC_MODE, None)
9518 nic_mode = nic_mode_req
9519 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9520 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9522 net = nic.get(constants.INIC_NETWORK, None)
9523 link = nic.get(constants.NIC_LINK, None)
9524 ip = nic.get(constants.INIC_IP, None)
9526 if net is None or net.lower() == constants.VALUE_NONE:
9529 if nic_mode_req is not None or link is not None:
9530 raise errors.OpPrereqError("If network is given, no mode or link"
9531 " is allowed to be passed",
9534 # ip validity checks
9535 if ip is None or ip.lower() == constants.VALUE_NONE:
9537 elif ip.lower() == constants.VALUE_AUTO:
9538 if not op.name_check:
9539 raise errors.OpPrereqError("IP address set to auto but name checks"
9540 " have been skipped",
9544 # We defer pool operations until later, so that the iallocator has
9545 # filled in the instance's node(s) dimara
9546 if ip.lower() == constants.NIC_IP_POOL:
9548 raise errors.OpPrereqError("if ip=pool, parameter network"
9549 " must be passed too",
9552 elif not netutils.IPAddress.IsValid(ip):
9553 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9558 # TODO: check the ip address for uniqueness
9559 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9560 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9563 # MAC address verification
9564 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9565 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9566 mac = utils.NormalizeAndValidateMac(mac)
9569 # TODO: We need to factor this out
9570 cfg.ReserveMAC(mac, ec_id)
9571 except errors.ReservationError:
9572 raise errors.OpPrereqError("MAC address %s already in use"
9573 " in cluster" % mac,
9574 errors.ECODE_NOTUNIQUE)
9576 # Build nic parameters
9579 nicparams[constants.NIC_MODE] = nic_mode
9581 nicparams[constants.NIC_LINK] = link
9583 check_params = cluster.SimpleFillNIC(nicparams)
9584 objects.NIC.CheckParameterSyntax(check_params)
9585 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9586 network=net, nicparams=nicparams))
9591 def _ComputeDisks(op, default_vg):
9592 """Computes the instance disks.
9594 @param op: The instance opcode
9595 @param default_vg: The default_vg to assume
9597 @return: The computer disks
9601 for disk in op.disks:
9602 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9603 if mode not in constants.DISK_ACCESS_SET:
9604 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9605 mode, errors.ECODE_INVAL)
9606 size = disk.get(constants.IDISK_SIZE, None)
9608 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9611 except (TypeError, ValueError):
9612 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9615 data_vg = disk.get(constants.IDISK_VG, default_vg)
9617 constants.IDISK_SIZE: size,
9618 constants.IDISK_MODE: mode,
9619 constants.IDISK_VG: data_vg,
9621 if constants.IDISK_METAVG in disk:
9622 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9623 if constants.IDISK_ADOPT in disk:
9624 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9625 disks.append(new_disk)
9630 def _ComputeFullBeParams(op, cluster):
9631 """Computes the full beparams.
9633 @param op: The instance opcode
9634 @param cluster: The cluster config object
9636 @return: The fully filled beparams
9639 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9640 for param, value in op.beparams.iteritems():
9641 if value == constants.VALUE_AUTO:
9642 op.beparams[param] = default_beparams[param]
9643 objects.UpgradeBeParams(op.beparams)
9644 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9645 return cluster.SimpleFillBE(op.beparams)
9648 class LUInstanceCreate(LogicalUnit):
9649 """Create an instance.
9652 HPATH = "instance-add"
9653 HTYPE = constants.HTYPE_INSTANCE
9656 def CheckArguments(self):
9660 # do not require name_check to ease forward/backward compatibility
9662 if self.op.no_install and self.op.start:
9663 self.LogInfo("No-installation mode selected, disabling startup")
9664 self.op.start = False
9665 # validate/normalize the instance name
9666 self.op.instance_name = \
9667 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9669 if self.op.ip_check and not self.op.name_check:
9670 # TODO: make the ip check more flexible and not depend on the name check
9671 raise errors.OpPrereqError("Cannot do IP address check without a name"
9672 " check", errors.ECODE_INVAL)
9674 # check nics' parameter names
9675 for nic in self.op.nics:
9676 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9678 # check disks. parameter names and consistent adopt/no-adopt strategy
9679 has_adopt = has_no_adopt = False
9680 for disk in self.op.disks:
9681 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9682 if constants.IDISK_ADOPT in disk:
9686 if has_adopt and has_no_adopt:
9687 raise errors.OpPrereqError("Either all disks are adopted or none is",
9690 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9691 raise errors.OpPrereqError("Disk adoption is not supported for the"
9692 " '%s' disk template" %
9693 self.op.disk_template,
9695 if self.op.iallocator is not None:
9696 raise errors.OpPrereqError("Disk adoption not allowed with an"
9697 " iallocator script", errors.ECODE_INVAL)
9698 if self.op.mode == constants.INSTANCE_IMPORT:
9699 raise errors.OpPrereqError("Disk adoption not allowed for"
9700 " instance import", errors.ECODE_INVAL)
9702 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9703 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9704 " but no 'adopt' parameter given" %
9705 self.op.disk_template,
9708 self.adopt_disks = has_adopt
9710 # instance name verification
9711 if self.op.name_check:
9712 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9713 self.op.instance_name = self.hostname1.name
9714 # used in CheckPrereq for ip ping check
9715 self.check_ip = self.hostname1.ip
9717 self.check_ip = None
9719 # file storage checks
9720 if (self.op.file_driver and
9721 not self.op.file_driver in constants.FILE_DRIVER):
9722 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9723 self.op.file_driver, errors.ECODE_INVAL)
9725 if self.op.disk_template == constants.DT_FILE:
9726 opcodes.RequireFileStorage()
9727 elif self.op.disk_template == constants.DT_SHARED_FILE:
9728 opcodes.RequireSharedFileStorage()
9730 ### Node/iallocator related checks
9731 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9733 if self.op.pnode is not None:
9734 if self.op.disk_template in constants.DTS_INT_MIRROR:
9735 if self.op.snode is None:
9736 raise errors.OpPrereqError("The networked disk templates need"
9737 " a mirror node", errors.ECODE_INVAL)
9739 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9741 self.op.snode = None
9743 self._cds = _GetClusterDomainSecret()
9745 if self.op.mode == constants.INSTANCE_IMPORT:
9746 # On import force_variant must be True, because if we forced it at
9747 # initial install, our only chance when importing it back is that it
9749 self.op.force_variant = True
9751 if self.op.no_install:
9752 self.LogInfo("No-installation mode has no effect during import")
9754 elif self.op.mode == constants.INSTANCE_CREATE:
9755 if self.op.os_type is None:
9756 raise errors.OpPrereqError("No guest OS specified",
9758 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9759 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9760 " installation" % self.op.os_type,
9762 if self.op.disk_template is None:
9763 raise errors.OpPrereqError("No disk template specified",
9766 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9767 # Check handshake to ensure both clusters have the same domain secret
9768 src_handshake = self.op.source_handshake
9769 if not src_handshake:
9770 raise errors.OpPrereqError("Missing source handshake",
9773 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9776 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9779 # Load and check source CA
9780 self.source_x509_ca_pem = self.op.source_x509_ca
9781 if not self.source_x509_ca_pem:
9782 raise errors.OpPrereqError("Missing source X509 CA",
9786 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9788 except OpenSSL.crypto.Error, err:
9789 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9790 (err, ), errors.ECODE_INVAL)
9792 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9793 if errcode is not None:
9794 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9797 self.source_x509_ca = cert
9799 src_instance_name = self.op.source_instance_name
9800 if not src_instance_name:
9801 raise errors.OpPrereqError("Missing source instance name",
9804 self.source_instance_name = \
9805 netutils.GetHostname(name=src_instance_name).name
9808 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9809 self.op.mode, errors.ECODE_INVAL)
9811 def ExpandNames(self):
9812 """ExpandNames for CreateInstance.
9814 Figure out the right locks for instance creation.
9817 self.needed_locks = {}
9819 instance_name = self.op.instance_name
9820 # this is just a preventive check, but someone might still add this
9821 # instance in the meantime, and creation will fail at lock-add time
9822 if instance_name in self.cfg.GetInstanceList():
9823 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9824 instance_name, errors.ECODE_EXISTS)
9826 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9828 if self.op.iallocator:
9829 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9830 # specifying a group on instance creation and then selecting nodes from
9832 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9833 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9835 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9836 nodelist = [self.op.pnode]
9837 if self.op.snode is not None:
9838 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9839 nodelist.append(self.op.snode)
9840 self.needed_locks[locking.LEVEL_NODE] = nodelist
9841 # Lock resources of instance's primary and secondary nodes (copy to
9842 # prevent accidential modification)
9843 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9845 # in case of import lock the source node too
9846 if self.op.mode == constants.INSTANCE_IMPORT:
9847 src_node = self.op.src_node
9848 src_path = self.op.src_path
9850 if src_path is None:
9851 self.op.src_path = src_path = self.op.instance_name
9853 if src_node is None:
9854 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9855 self.op.src_node = None
9856 if os.path.isabs(src_path):
9857 raise errors.OpPrereqError("Importing an instance from a path"
9858 " requires a source node option",
9861 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9862 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9863 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9864 if not os.path.isabs(src_path):
9865 self.op.src_path = src_path = \
9866 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9868 def _RunAllocator(self):
9869 """Run the allocator based on input opcode.
9872 #TODO Export network to iallocator so that it chooses a pnode
9873 # in a nodegroup that has the desired network connected to
9874 req = _CreateInstanceAllocRequest(self.op, self.disks,
9875 self.nics, self.be_full)
9876 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9878 ial.Run(self.op.iallocator)
9881 raise errors.OpPrereqError("Can't compute nodes using"
9882 " iallocator '%s': %s" %
9883 (self.op.iallocator, ial.info),
9885 self.op.pnode = ial.result[0]
9886 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9887 self.op.instance_name, self.op.iallocator,
9888 utils.CommaJoin(ial.result))
9890 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9892 if req.RequiredNodes() == 2:
9893 self.op.snode = ial.result[1]
9895 def BuildHooksEnv(self):
9898 This runs on master, primary and secondary nodes of the instance.
9902 "ADD_MODE": self.op.mode,
9904 if self.op.mode == constants.INSTANCE_IMPORT:
9905 env["SRC_NODE"] = self.op.src_node
9906 env["SRC_PATH"] = self.op.src_path
9907 env["SRC_IMAGES"] = self.src_images
9909 env.update(_BuildInstanceHookEnv(
9910 name=self.op.instance_name,
9911 primary_node=self.op.pnode,
9912 secondary_nodes=self.secondaries,
9913 status=self.op.start,
9914 os_type=self.op.os_type,
9915 minmem=self.be_full[constants.BE_MINMEM],
9916 maxmem=self.be_full[constants.BE_MAXMEM],
9917 vcpus=self.be_full[constants.BE_VCPUS],
9918 nics=_NICListToTuple(self, self.nics),
9919 disk_template=self.op.disk_template,
9920 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9921 for d in self.disks],
9924 hypervisor_name=self.op.hypervisor,
9930 def BuildHooksNodes(self):
9931 """Build hooks nodes.
9934 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9937 def _ReadExportInfo(self):
9938 """Reads the export information from disk.
9940 It will override the opcode source node and path with the actual
9941 information, if these two were not specified before.
9943 @return: the export information
9946 assert self.op.mode == constants.INSTANCE_IMPORT
9948 src_node = self.op.src_node
9949 src_path = self.op.src_path
9951 if src_node is None:
9952 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9953 exp_list = self.rpc.call_export_list(locked_nodes)
9955 for node in exp_list:
9956 if exp_list[node].fail_msg:
9958 if src_path in exp_list[node].payload:
9960 self.op.src_node = src_node = node
9961 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9965 raise errors.OpPrereqError("No export found for relative path %s" %
9966 src_path, errors.ECODE_INVAL)
9968 _CheckNodeOnline(self, src_node)
9969 result = self.rpc.call_export_info(src_node, src_path)
9970 result.Raise("No export or invalid export found in dir %s" % src_path)
9972 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9973 if not export_info.has_section(constants.INISECT_EXP):
9974 raise errors.ProgrammerError("Corrupted export config",
9975 errors.ECODE_ENVIRON)
9977 ei_version = export_info.get(constants.INISECT_EXP, "version")
9978 if (int(ei_version) != constants.EXPORT_VERSION):
9979 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9980 (ei_version, constants.EXPORT_VERSION),
9981 errors.ECODE_ENVIRON)
9984 def _ReadExportParams(self, einfo):
9985 """Use export parameters as defaults.
9987 In case the opcode doesn't specify (as in override) some instance
9988 parameters, then try to use them from the export information, if
9992 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9994 if self.op.disk_template is None:
9995 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9996 self.op.disk_template = einfo.get(constants.INISECT_INS,
9998 if self.op.disk_template not in constants.DISK_TEMPLATES:
9999 raise errors.OpPrereqError("Disk template specified in configuration"
10000 " file is not one of the allowed values:"
10002 " ".join(constants.DISK_TEMPLATES),
10003 errors.ECODE_INVAL)
10005 raise errors.OpPrereqError("No disk template specified and the export"
10006 " is missing the disk_template information",
10007 errors.ECODE_INVAL)
10009 if not self.op.disks:
10011 # TODO: import the disk iv_name too
10012 for idx in range(constants.MAX_DISKS):
10013 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10014 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10015 disks.append({constants.IDISK_SIZE: disk_sz})
10016 self.op.disks = disks
10017 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10018 raise errors.OpPrereqError("No disk info specified and the export"
10019 " is missing the disk information",
10020 errors.ECODE_INVAL)
10022 if not self.op.nics:
10024 for idx in range(constants.MAX_NICS):
10025 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10027 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10028 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10033 self.op.nics = nics
10035 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10036 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10038 if (self.op.hypervisor is None and
10039 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10040 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10042 if einfo.has_section(constants.INISECT_HYP):
10043 # use the export parameters but do not override the ones
10044 # specified by the user
10045 for name, value in einfo.items(constants.INISECT_HYP):
10046 if name not in self.op.hvparams:
10047 self.op.hvparams[name] = value
10049 if einfo.has_section(constants.INISECT_BEP):
10050 # use the parameters, without overriding
10051 for name, value in einfo.items(constants.INISECT_BEP):
10052 if name not in self.op.beparams:
10053 self.op.beparams[name] = value
10054 # Compatibility for the old "memory" be param
10055 if name == constants.BE_MEMORY:
10056 if constants.BE_MAXMEM not in self.op.beparams:
10057 self.op.beparams[constants.BE_MAXMEM] = value
10058 if constants.BE_MINMEM not in self.op.beparams:
10059 self.op.beparams[constants.BE_MINMEM] = value
10061 # try to read the parameters old style, from the main section
10062 for name in constants.BES_PARAMETERS:
10063 if (name not in self.op.beparams and
10064 einfo.has_option(constants.INISECT_INS, name)):
10065 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10067 if einfo.has_section(constants.INISECT_OSP):
10068 # use the parameters, without overriding
10069 for name, value in einfo.items(constants.INISECT_OSP):
10070 if name not in self.op.osparams:
10071 self.op.osparams[name] = value
10073 def _RevertToDefaults(self, cluster):
10074 """Revert the instance parameters to the default values.
10078 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10079 for name in self.op.hvparams.keys():
10080 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10081 del self.op.hvparams[name]
10083 be_defs = cluster.SimpleFillBE({})
10084 for name in self.op.beparams.keys():
10085 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10086 del self.op.beparams[name]
10088 nic_defs = cluster.SimpleFillNIC({})
10089 for nic in self.op.nics:
10090 for name in constants.NICS_PARAMETERS:
10091 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10094 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10095 for name in self.op.osparams.keys():
10096 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10097 del self.op.osparams[name]
10099 def _CalculateFileStorageDir(self):
10100 """Calculate final instance file storage dir.
10103 # file storage dir calculation/check
10104 self.instance_file_storage_dir = None
10105 if self.op.disk_template in constants.DTS_FILEBASED:
10106 # build the full file storage dir path
10109 if self.op.disk_template == constants.DT_SHARED_FILE:
10110 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10112 get_fsd_fn = self.cfg.GetFileStorageDir
10114 cfg_storagedir = get_fsd_fn()
10115 if not cfg_storagedir:
10116 raise errors.OpPrereqError("Cluster file storage dir not defined",
10117 errors.ECODE_STATE)
10118 joinargs.append(cfg_storagedir)
10120 if self.op.file_storage_dir is not None:
10121 joinargs.append(self.op.file_storage_dir)
10123 joinargs.append(self.op.instance_name)
10125 # pylint: disable=W0142
10126 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10128 def CheckPrereq(self): # pylint: disable=R0914
10129 """Check prerequisites.
10132 self._CalculateFileStorageDir()
10134 if self.op.mode == constants.INSTANCE_IMPORT:
10135 export_info = self._ReadExportInfo()
10136 self._ReadExportParams(export_info)
10137 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10139 self._old_instance_name = None
10141 if (not self.cfg.GetVGName() and
10142 self.op.disk_template not in constants.DTS_NOT_LVM):
10143 raise errors.OpPrereqError("Cluster does not support lvm-based"
10144 " instances", errors.ECODE_STATE)
10146 if (self.op.hypervisor is None or
10147 self.op.hypervisor == constants.VALUE_AUTO):
10148 self.op.hypervisor = self.cfg.GetHypervisorType()
10150 cluster = self.cfg.GetClusterInfo()
10151 enabled_hvs = cluster.enabled_hypervisors
10152 if self.op.hypervisor not in enabled_hvs:
10153 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10155 (self.op.hypervisor, ",".join(enabled_hvs)),
10156 errors.ECODE_STATE)
10158 # Check tag validity
10159 for tag in self.op.tags:
10160 objects.TaggableObject.ValidateTag(tag)
10162 # check hypervisor parameter syntax (locally)
10163 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10164 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10166 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10167 hv_type.CheckParameterSyntax(filled_hvp)
10168 self.hv_full = filled_hvp
10169 # check that we don't specify global parameters on an instance
10170 _CheckGlobalHvParams(self.op.hvparams)
10172 # fill and remember the beparams dict
10173 self.be_full = _ComputeFullBeParams(self.op, cluster)
10175 # build os parameters
10176 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10178 # now that hvp/bep are in final format, let's reset to defaults,
10180 if self.op.identify_defaults:
10181 self._RevertToDefaults(cluster)
10184 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10185 self.proc.GetECId())
10187 # disk checks/pre-build
10188 default_vg = self.cfg.GetVGName()
10189 self.disks = _ComputeDisks(self.op, default_vg)
10191 if self.op.mode == constants.INSTANCE_IMPORT:
10193 for idx in range(len(self.disks)):
10194 option = "disk%d_dump" % idx
10195 if export_info.has_option(constants.INISECT_INS, option):
10196 # FIXME: are the old os-es, disk sizes, etc. useful?
10197 export_name = export_info.get(constants.INISECT_INS, option)
10198 image = utils.PathJoin(self.op.src_path, export_name)
10199 disk_images.append(image)
10201 disk_images.append(False)
10203 self.src_images = disk_images
10205 if self.op.instance_name == self._old_instance_name:
10206 for idx, nic in enumerate(self.nics):
10207 if nic.mac == constants.VALUE_AUTO:
10208 nic_mac_ini = "nic%d_mac" % idx
10209 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10211 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10213 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10214 if self.op.ip_check:
10215 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10216 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10217 (self.check_ip, self.op.instance_name),
10218 errors.ECODE_NOTUNIQUE)
10220 #### mac address generation
10221 # By generating here the mac address both the allocator and the hooks get
10222 # the real final mac address rather than the 'auto' or 'generate' value.
10223 # There is a race condition between the generation and the instance object
10224 # creation, which means that we know the mac is valid now, but we're not
10225 # sure it will be when we actually add the instance. If things go bad
10226 # adding the instance will abort because of a duplicate mac, and the
10227 # creation job will fail.
10228 for nic in self.nics:
10229 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10230 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10234 if self.op.iallocator is not None:
10235 self._RunAllocator()
10237 # Release all unneeded node locks
10238 _ReleaseLocks(self, locking.LEVEL_NODE,
10239 keep=filter(None, [self.op.pnode, self.op.snode,
10240 self.op.src_node]))
10241 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10242 keep=filter(None, [self.op.pnode, self.op.snode,
10243 self.op.src_node]))
10245 #### node related checks
10247 # check primary node
10248 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10249 assert self.pnode is not None, \
10250 "Cannot retrieve locked node %s" % self.op.pnode
10252 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10253 pnode.name, errors.ECODE_STATE)
10255 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10256 pnode.name, errors.ECODE_STATE)
10257 if not pnode.vm_capable:
10258 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10259 " '%s'" % pnode.name, errors.ECODE_STATE)
10261 self.secondaries = []
10263 # Fill in any IPs from IP pools. This must happen here, because we need to
10264 # know the nic's primary node, as specified by the iallocator
10265 for idx, nic in enumerate(self.nics):
10267 if net is not None:
10268 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10269 if netparams is None:
10270 raise errors.OpPrereqError("No netparams found for network"
10271 " %s. Propably not connected to"
10272 " node's %s nodegroup" %
10273 (net, self.pnode.name),
10274 errors.ECODE_INVAL)
10275 self.LogInfo("NIC/%d inherits netparams %s" %
10276 (idx, netparams.values()))
10277 nic.nicparams = dict(netparams)
10278 if nic.ip is not None:
10279 if nic.ip.lower() == constants.NIC_IP_POOL:
10281 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10282 except errors.ReservationError:
10283 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10284 " from the address pool" % idx,
10285 errors.ECODE_STATE)
10286 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10289 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10290 except errors.ReservationError:
10291 raise errors.OpPrereqError("IP address %s already in use"
10292 " or does not belong to network %s" %
10294 errors.ECODE_NOTUNIQUE)
10296 # net is None, ip None or given
10297 if self.op.conflicts_check:
10298 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10300 # mirror node verification
10301 if self.op.disk_template in constants.DTS_INT_MIRROR:
10302 if self.op.snode == pnode.name:
10303 raise errors.OpPrereqError("The secondary node cannot be the"
10304 " primary node", errors.ECODE_INVAL)
10305 _CheckNodeOnline(self, self.op.snode)
10306 _CheckNodeNotDrained(self, self.op.snode)
10307 _CheckNodeVmCapable(self, self.op.snode)
10308 self.secondaries.append(self.op.snode)
10310 snode = self.cfg.GetNodeInfo(self.op.snode)
10311 if pnode.group != snode.group:
10312 self.LogWarning("The primary and secondary nodes are in two"
10313 " different node groups; the disk parameters"
10314 " from the first disk's node group will be"
10317 nodenames = [pnode.name] + self.secondaries
10319 # Verify instance specs
10320 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10322 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10323 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10324 constants.ISPEC_DISK_COUNT: len(self.disks),
10325 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10326 constants.ISPEC_NIC_COUNT: len(self.nics),
10327 constants.ISPEC_SPINDLE_USE: spindle_use,
10330 group_info = self.cfg.GetNodeGroup(pnode.group)
10331 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10332 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10333 if not self.op.ignore_ipolicy and res:
10334 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10335 (pnode.group, group_info.name, utils.CommaJoin(res)))
10336 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10338 if not self.adopt_disks:
10339 if self.op.disk_template == constants.DT_RBD:
10340 # _CheckRADOSFreeSpace() is just a placeholder.
10341 # Any function that checks prerequisites can be placed here.
10342 # Check if there is enough space on the RADOS cluster.
10343 _CheckRADOSFreeSpace()
10345 # Check lv size requirements, if not adopting
10346 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10347 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10349 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10350 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10351 disk[constants.IDISK_ADOPT])
10352 for disk in self.disks])
10353 if len(all_lvs) != len(self.disks):
10354 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10355 errors.ECODE_INVAL)
10356 for lv_name in all_lvs:
10358 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10359 # to ReserveLV uses the same syntax
10360 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10361 except errors.ReservationError:
10362 raise errors.OpPrereqError("LV named %s used by another instance" %
10363 lv_name, errors.ECODE_NOTUNIQUE)
10365 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10366 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10368 node_lvs = self.rpc.call_lv_list([pnode.name],
10369 vg_names.payload.keys())[pnode.name]
10370 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10371 node_lvs = node_lvs.payload
10373 delta = all_lvs.difference(node_lvs.keys())
10375 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10376 utils.CommaJoin(delta),
10377 errors.ECODE_INVAL)
10378 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10380 raise errors.OpPrereqError("Online logical volumes found, cannot"
10381 " adopt: %s" % utils.CommaJoin(online_lvs),
10382 errors.ECODE_STATE)
10383 # update the size of disk based on what is found
10384 for dsk in self.disks:
10385 dsk[constants.IDISK_SIZE] = \
10386 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10387 dsk[constants.IDISK_ADOPT])][0]))
10389 elif self.op.disk_template == constants.DT_BLOCK:
10390 # Normalize and de-duplicate device paths
10391 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10392 for disk in self.disks])
10393 if len(all_disks) != len(self.disks):
10394 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10395 errors.ECODE_INVAL)
10396 baddisks = [d for d in all_disks
10397 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10399 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10400 " cannot be adopted" %
10401 (utils.CommaJoin(baddisks),
10402 constants.ADOPTABLE_BLOCKDEV_ROOT),
10403 errors.ECODE_INVAL)
10405 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10406 list(all_disks))[pnode.name]
10407 node_disks.Raise("Cannot get block device information from node %s" %
10409 node_disks = node_disks.payload
10410 delta = all_disks.difference(node_disks.keys())
10412 raise errors.OpPrereqError("Missing block device(s): %s" %
10413 utils.CommaJoin(delta),
10414 errors.ECODE_INVAL)
10415 for dsk in self.disks:
10416 dsk[constants.IDISK_SIZE] = \
10417 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10419 # Verify instance specs
10420 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10422 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10423 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10424 constants.ISPEC_DISK_COUNT: len(self.disks),
10425 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10426 for disk in self.disks],
10427 constants.ISPEC_NIC_COUNT: len(self.nics),
10428 constants.ISPEC_SPINDLE_USE: spindle_use,
10431 group_info = self.cfg.GetNodeGroup(pnode.group)
10432 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10433 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10434 if not self.op.ignore_ipolicy and res:
10435 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10436 " policy: %s") % (pnode.group,
10437 utils.CommaJoin(res)),
10438 errors.ECODE_INVAL)
10440 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10442 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10443 # check OS parameters (remotely)
10444 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10446 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10448 # memory check on primary node
10449 #TODO(dynmem): use MINMEM for checking
10451 _CheckNodeFreeMemory(self, self.pnode.name,
10452 "creating instance %s" % self.op.instance_name,
10453 self.be_full[constants.BE_MAXMEM],
10454 self.op.hypervisor)
10456 self.dry_run_result = list(nodenames)
10458 def Exec(self, feedback_fn):
10459 """Create and add the instance to the cluster.
10462 instance = self.op.instance_name
10463 pnode_name = self.pnode.name
10465 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10466 self.owned_locks(locking.LEVEL_NODE)), \
10467 "Node locks differ from node resource locks"
10469 ht_kind = self.op.hypervisor
10470 if ht_kind in constants.HTS_REQ_PORT:
10471 network_port = self.cfg.AllocatePort()
10473 network_port = None
10475 # This is ugly but we got a chicken-egg problem here
10476 # We can only take the group disk parameters, as the instance
10477 # has no disks yet (we are generating them right here).
10478 node = self.cfg.GetNodeInfo(pnode_name)
10479 nodegroup = self.cfg.GetNodeGroup(node.group)
10480 disks = _GenerateDiskTemplate(self,
10481 self.op.disk_template,
10482 instance, pnode_name,
10485 self.instance_file_storage_dir,
10486 self.op.file_driver,
10489 self.cfg.GetGroupDiskParams(nodegroup))
10491 iobj = objects.Instance(name=instance, os=self.op.os_type,
10492 primary_node=pnode_name,
10493 nics=self.nics, disks=disks,
10494 disk_template=self.op.disk_template,
10495 admin_state=constants.ADMINST_DOWN,
10496 network_port=network_port,
10497 beparams=self.op.beparams,
10498 hvparams=self.op.hvparams,
10499 hypervisor=self.op.hypervisor,
10500 osparams=self.op.osparams,
10504 for tag in self.op.tags:
10507 if self.adopt_disks:
10508 if self.op.disk_template == constants.DT_PLAIN:
10509 # rename LVs to the newly-generated names; we need to construct
10510 # 'fake' LV disks with the old data, plus the new unique_id
10511 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10513 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10514 rename_to.append(t_dsk.logical_id)
10515 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10516 self.cfg.SetDiskID(t_dsk, pnode_name)
10517 result = self.rpc.call_blockdev_rename(pnode_name,
10518 zip(tmp_disks, rename_to))
10519 result.Raise("Failed to rename adoped LVs")
10521 feedback_fn("* creating instance disks...")
10523 _CreateDisks(self, iobj)
10524 except errors.OpExecError:
10525 self.LogWarning("Device creation failed, reverting...")
10527 _RemoveDisks(self, iobj)
10529 self.cfg.ReleaseDRBDMinors(instance)
10532 feedback_fn("adding instance %s to cluster config" % instance)
10534 self.cfg.AddInstance(iobj, self.proc.GetECId())
10536 # Declare that we don't want to remove the instance lock anymore, as we've
10537 # added the instance to the config
10538 del self.remove_locks[locking.LEVEL_INSTANCE]
10540 if self.op.mode == constants.INSTANCE_IMPORT:
10541 # Release unused nodes
10542 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10544 # Release all nodes
10545 _ReleaseLocks(self, locking.LEVEL_NODE)
10548 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10549 feedback_fn("* wiping instance disks...")
10551 _WipeDisks(self, iobj)
10552 except errors.OpExecError, err:
10553 logging.exception("Wiping disks failed")
10554 self.LogWarning("Wiping instance disks failed (%s)", err)
10558 # Something is already wrong with the disks, don't do anything else
10560 elif self.op.wait_for_sync:
10561 disk_abort = not _WaitForSync(self, iobj)
10562 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10563 # make sure the disks are not degraded (still sync-ing is ok)
10564 feedback_fn("* checking mirrors status")
10565 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10570 _RemoveDisks(self, iobj)
10571 self.cfg.RemoveInstance(iobj.name)
10572 # Make sure the instance lock gets removed
10573 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10574 raise errors.OpExecError("There are some degraded disks for"
10577 # Release all node resource locks
10578 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10580 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10581 # we need to set the disks ID to the primary node, since the
10582 # preceding code might or might have not done it, depending on
10583 # disk template and other options
10584 for disk in iobj.disks:
10585 self.cfg.SetDiskID(disk, pnode_name)
10586 if self.op.mode == constants.INSTANCE_CREATE:
10587 if not self.op.no_install:
10588 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10589 not self.op.wait_for_sync)
10591 feedback_fn("* pausing disk sync to install instance OS")
10592 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10595 for idx, success in enumerate(result.payload):
10597 logging.warn("pause-sync of instance %s for disk %d failed",
10600 feedback_fn("* running the instance OS create scripts...")
10601 # FIXME: pass debug option from opcode to backend
10603 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10604 self.op.debug_level)
10606 feedback_fn("* resuming disk sync")
10607 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10610 for idx, success in enumerate(result.payload):
10612 logging.warn("resume-sync of instance %s for disk %d failed",
10615 os_add_result.Raise("Could not add os for instance %s"
10616 " on node %s" % (instance, pnode_name))
10619 if self.op.mode == constants.INSTANCE_IMPORT:
10620 feedback_fn("* running the instance OS import scripts...")
10624 for idx, image in enumerate(self.src_images):
10628 # FIXME: pass debug option from opcode to backend
10629 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10630 constants.IEIO_FILE, (image, ),
10631 constants.IEIO_SCRIPT,
10632 (iobj.disks[idx], idx),
10634 transfers.append(dt)
10637 masterd.instance.TransferInstanceData(self, feedback_fn,
10638 self.op.src_node, pnode_name,
10639 self.pnode.secondary_ip,
10641 if not compat.all(import_result):
10642 self.LogWarning("Some disks for instance %s on node %s were not"
10643 " imported successfully" % (instance, pnode_name))
10645 rename_from = self._old_instance_name
10647 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10648 feedback_fn("* preparing remote import...")
10649 # The source cluster will stop the instance before attempting to make
10650 # a connection. In some cases stopping an instance can take a long
10651 # time, hence the shutdown timeout is added to the connection
10653 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10654 self.op.source_shutdown_timeout)
10655 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10657 assert iobj.primary_node == self.pnode.name
10659 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10660 self.source_x509_ca,
10661 self._cds, timeouts)
10662 if not compat.all(disk_results):
10663 # TODO: Should the instance still be started, even if some disks
10664 # failed to import (valid for local imports, too)?
10665 self.LogWarning("Some disks for instance %s on node %s were not"
10666 " imported successfully" % (instance, pnode_name))
10668 rename_from = self.source_instance_name
10671 # also checked in the prereq part
10672 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10675 # Run rename script on newly imported instance
10676 assert iobj.name == instance
10677 feedback_fn("Running rename script for %s" % instance)
10678 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10680 self.op.debug_level)
10681 if result.fail_msg:
10682 self.LogWarning("Failed to run rename script for %s on node"
10683 " %s: %s" % (instance, pnode_name, result.fail_msg))
10685 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10688 iobj.admin_state = constants.ADMINST_UP
10689 self.cfg.Update(iobj, feedback_fn)
10690 logging.info("Starting instance %s on node %s", instance, pnode_name)
10691 feedback_fn("* starting instance...")
10692 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10694 result.Raise("Could not start instance")
10696 return list(iobj.all_nodes)
10699 class LUInstanceMultiAlloc(NoHooksLU):
10700 """Allocates multiple instances at the same time.
10705 def CheckArguments(self):
10706 """Check arguments.
10710 for inst in self.op.instances:
10711 if inst.iallocator is not None:
10712 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10713 " instance objects", errors.ECODE_INVAL)
10714 nodes.append(bool(inst.pnode))
10715 if inst.disk_template in constants.DTS_INT_MIRROR:
10716 nodes.append(bool(inst.snode))
10718 has_nodes = compat.any(nodes)
10719 if compat.all(nodes) ^ has_nodes:
10720 raise errors.OpPrereqError("There are instance objects providing"
10721 " pnode/snode while others do not",
10722 errors.ECODE_INVAL)
10724 if self.op.iallocator is None:
10725 default_iallocator = self.cfg.GetDefaultIAllocator()
10726 if default_iallocator and has_nodes:
10727 self.op.iallocator = default_iallocator
10729 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10730 " given and no cluster-wide default"
10731 " iallocator found; please specify either"
10732 " an iallocator or nodes on the instances"
10733 " or set a cluster-wide default iallocator",
10734 errors.ECODE_INVAL)
10736 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10738 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10739 utils.CommaJoin(dups), errors.ECODE_INVAL)
10741 def ExpandNames(self):
10742 """Calculate the locks.
10745 self.share_locks = _ShareAll()
10746 self.needed_locks = {}
10748 if self.op.iallocator:
10749 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10750 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10753 for inst in self.op.instances:
10754 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10755 nodeslist.append(inst.pnode)
10756 if inst.snode is not None:
10757 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10758 nodeslist.append(inst.snode)
10760 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10761 # Lock resources of instance's primary and secondary nodes (copy to
10762 # prevent accidential modification)
10763 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10765 def CheckPrereq(self):
10766 """Check prerequisite.
10769 cluster = self.cfg.GetClusterInfo()
10770 default_vg = self.cfg.GetVGName()
10771 ec_id = self.proc.GetECId()
10773 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10774 _ComputeNics(op, cluster, None,
10776 _ComputeFullBeParams(op, cluster))
10777 for op in self.op.instances]
10779 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10780 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10782 ial.Run(self.op.iallocator)
10784 if not ial.success:
10785 raise errors.OpPrereqError("Can't compute nodes using"
10786 " iallocator '%s': %s" %
10787 (self.op.iallocator, ial.info),
10788 errors.ECODE_NORES)
10790 self.ia_result = ial.result
10792 if self.op.dry_run:
10793 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10794 constants.JOB_IDS_KEY: [],
10797 def _ConstructPartialResult(self):
10798 """Contructs the partial result.
10801 (allocatable, failed) = self.ia_result
10803 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10804 map(compat.fst, allocatable),
10805 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10808 def Exec(self, feedback_fn):
10809 """Executes the opcode.
10812 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10813 (allocatable, failed) = self.ia_result
10816 for (name, nodes) in allocatable:
10817 op = op2inst.pop(name)
10820 (op.pnode, op.snode) = nodes
10822 (op.pnode,) = nodes
10826 missing = set(op2inst.keys()) - set(failed)
10827 assert not missing, \
10828 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10830 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10833 def _CheckRADOSFreeSpace():
10834 """Compute disk size requirements inside the RADOS cluster.
10837 # For the RADOS cluster we assume there is always enough space.
10841 class LUInstanceConsole(NoHooksLU):
10842 """Connect to an instance's console.
10844 This is somewhat special in that it returns the command line that
10845 you need to run on the master node in order to connect to the
10851 def ExpandNames(self):
10852 self.share_locks = _ShareAll()
10853 self._ExpandAndLockInstance()
10855 def CheckPrereq(self):
10856 """Check prerequisites.
10858 This checks that the instance is in the cluster.
10861 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10862 assert self.instance is not None, \
10863 "Cannot retrieve locked instance %s" % self.op.instance_name
10864 _CheckNodeOnline(self, self.instance.primary_node)
10866 def Exec(self, feedback_fn):
10867 """Connect to the console of an instance
10870 instance = self.instance
10871 node = instance.primary_node
10873 node_insts = self.rpc.call_instance_list([node],
10874 [instance.hypervisor])[node]
10875 node_insts.Raise("Can't get node information from %s" % node)
10877 if instance.name not in node_insts.payload:
10878 if instance.admin_state == constants.ADMINST_UP:
10879 state = constants.INSTST_ERRORDOWN
10880 elif instance.admin_state == constants.ADMINST_DOWN:
10881 state = constants.INSTST_ADMINDOWN
10883 state = constants.INSTST_ADMINOFFLINE
10884 raise errors.OpExecError("Instance %s is not running (state %s)" %
10885 (instance.name, state))
10887 logging.debug("Connecting to console of %s on %s", instance.name, node)
10889 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10892 def _GetInstanceConsole(cluster, instance):
10893 """Returns console information for an instance.
10895 @type cluster: L{objects.Cluster}
10896 @type instance: L{objects.Instance}
10900 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10901 # beparams and hvparams are passed separately, to avoid editing the
10902 # instance and then saving the defaults in the instance itself.
10903 hvparams = cluster.FillHV(instance)
10904 beparams = cluster.FillBE(instance)
10905 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10907 assert console.instance == instance.name
10908 assert console.Validate()
10910 return console.ToDict()
10913 class LUInstanceReplaceDisks(LogicalUnit):
10914 """Replace the disks of an instance.
10917 HPATH = "mirrors-replace"
10918 HTYPE = constants.HTYPE_INSTANCE
10921 def CheckArguments(self):
10922 """Check arguments.
10925 remote_node = self.op.remote_node
10926 ialloc = self.op.iallocator
10927 if self.op.mode == constants.REPLACE_DISK_CHG:
10928 if remote_node is None and ialloc is None:
10929 raise errors.OpPrereqError("When changing the secondary either an"
10930 " iallocator script must be used or the"
10931 " new node given", errors.ECODE_INVAL)
10933 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10935 elif remote_node is not None or ialloc is not None:
10936 # Not replacing the secondary
10937 raise errors.OpPrereqError("The iallocator and new node options can"
10938 " only be used when changing the"
10939 " secondary node", errors.ECODE_INVAL)
10941 def ExpandNames(self):
10942 self._ExpandAndLockInstance()
10944 assert locking.LEVEL_NODE not in self.needed_locks
10945 assert locking.LEVEL_NODE_RES not in self.needed_locks
10946 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10948 assert self.op.iallocator is None or self.op.remote_node is None, \
10949 "Conflicting options"
10951 if self.op.remote_node is not None:
10952 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10954 # Warning: do not remove the locking of the new secondary here
10955 # unless DRBD8.AddChildren is changed to work in parallel;
10956 # currently it doesn't since parallel invocations of
10957 # FindUnusedMinor will conflict
10958 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10959 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10961 self.needed_locks[locking.LEVEL_NODE] = []
10962 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10964 if self.op.iallocator is not None:
10965 # iallocator will select a new node in the same group
10966 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10968 self.needed_locks[locking.LEVEL_NODE_RES] = []
10970 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10971 self.op.iallocator, self.op.remote_node,
10972 self.op.disks, self.op.early_release,
10973 self.op.ignore_ipolicy)
10975 self.tasklets = [self.replacer]
10977 def DeclareLocks(self, level):
10978 if level == locking.LEVEL_NODEGROUP:
10979 assert self.op.remote_node is None
10980 assert self.op.iallocator is not None
10981 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10983 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10984 # Lock all groups used by instance optimistically; this requires going
10985 # via the node before it's locked, requiring verification later on
10986 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10987 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10989 elif level == locking.LEVEL_NODE:
10990 if self.op.iallocator is not None:
10991 assert self.op.remote_node is None
10992 assert not self.needed_locks[locking.LEVEL_NODE]
10994 # Lock member nodes of all locked groups
10995 self.needed_locks[locking.LEVEL_NODE] = \
10997 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10998 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11000 self._LockInstancesNodes()
11001 elif level == locking.LEVEL_NODE_RES:
11003 self.needed_locks[locking.LEVEL_NODE_RES] = \
11004 self.needed_locks[locking.LEVEL_NODE]
11006 def BuildHooksEnv(self):
11007 """Build hooks env.
11009 This runs on the master, the primary and all the secondaries.
11012 instance = self.replacer.instance
11014 "MODE": self.op.mode,
11015 "NEW_SECONDARY": self.op.remote_node,
11016 "OLD_SECONDARY": instance.secondary_nodes[0],
11018 env.update(_BuildInstanceHookEnvByObject(self, instance))
11021 def BuildHooksNodes(self):
11022 """Build hooks nodes.
11025 instance = self.replacer.instance
11027 self.cfg.GetMasterNode(),
11028 instance.primary_node,
11030 if self.op.remote_node is not None:
11031 nl.append(self.op.remote_node)
11034 def CheckPrereq(self):
11035 """Check prerequisites.
11038 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11039 self.op.iallocator is None)
11041 # Verify if node group locks are still correct
11042 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11044 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11046 return LogicalUnit.CheckPrereq(self)
11049 class TLReplaceDisks(Tasklet):
11050 """Replaces disks for an instance.
11052 Note: Locking is not within the scope of this class.
11055 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11056 disks, early_release, ignore_ipolicy):
11057 """Initializes this class.
11060 Tasklet.__init__(self, lu)
11063 self.instance_name = instance_name
11065 self.iallocator_name = iallocator_name
11066 self.remote_node = remote_node
11068 self.early_release = early_release
11069 self.ignore_ipolicy = ignore_ipolicy
11072 self.instance = None
11073 self.new_node = None
11074 self.target_node = None
11075 self.other_node = None
11076 self.remote_node_info = None
11077 self.node_secondary_ip = None
11080 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11081 """Compute a new secondary node using an IAllocator.
11084 req = iallocator.IAReqRelocate(name=instance_name,
11085 relocate_from=list(relocate_from))
11086 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11088 ial.Run(iallocator_name)
11090 if not ial.success:
11091 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11092 " %s" % (iallocator_name, ial.info),
11093 errors.ECODE_NORES)
11095 remote_node_name = ial.result[0]
11097 lu.LogInfo("Selected new secondary for instance '%s': %s",
11098 instance_name, remote_node_name)
11100 return remote_node_name
11102 def _FindFaultyDisks(self, node_name):
11103 """Wrapper for L{_FindFaultyInstanceDisks}.
11106 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11109 def _CheckDisksActivated(self, instance):
11110 """Checks if the instance disks are activated.
11112 @param instance: The instance to check disks
11113 @return: True if they are activated, False otherwise
11116 nodes = instance.all_nodes
11118 for idx, dev in enumerate(instance.disks):
11120 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11121 self.cfg.SetDiskID(dev, node)
11123 result = _BlockdevFind(self, node, dev, instance)
11127 elif result.fail_msg or not result.payload:
11132 def CheckPrereq(self):
11133 """Check prerequisites.
11135 This checks that the instance is in the cluster.
11138 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11139 assert instance is not None, \
11140 "Cannot retrieve locked instance %s" % self.instance_name
11142 if instance.disk_template != constants.DT_DRBD8:
11143 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11144 " instances", errors.ECODE_INVAL)
11146 if len(instance.secondary_nodes) != 1:
11147 raise errors.OpPrereqError("The instance has a strange layout,"
11148 " expected one secondary but found %d" %
11149 len(instance.secondary_nodes),
11150 errors.ECODE_FAULT)
11152 instance = self.instance
11153 secondary_node = instance.secondary_nodes[0]
11155 if self.iallocator_name is None:
11156 remote_node = self.remote_node
11158 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11159 instance.name, instance.secondary_nodes)
11161 if remote_node is None:
11162 self.remote_node_info = None
11164 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11165 "Remote node '%s' is not locked" % remote_node
11167 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11168 assert self.remote_node_info is not None, \
11169 "Cannot retrieve locked node %s" % remote_node
11171 if remote_node == self.instance.primary_node:
11172 raise errors.OpPrereqError("The specified node is the primary node of"
11173 " the instance", errors.ECODE_INVAL)
11175 if remote_node == secondary_node:
11176 raise errors.OpPrereqError("The specified node is already the"
11177 " secondary node of the instance",
11178 errors.ECODE_INVAL)
11180 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11181 constants.REPLACE_DISK_CHG):
11182 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11183 errors.ECODE_INVAL)
11185 if self.mode == constants.REPLACE_DISK_AUTO:
11186 if not self._CheckDisksActivated(instance):
11187 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11188 " first" % self.instance_name,
11189 errors.ECODE_STATE)
11190 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11191 faulty_secondary = self._FindFaultyDisks(secondary_node)
11193 if faulty_primary and faulty_secondary:
11194 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11195 " one node and can not be repaired"
11196 " automatically" % self.instance_name,
11197 errors.ECODE_STATE)
11200 self.disks = faulty_primary
11201 self.target_node = instance.primary_node
11202 self.other_node = secondary_node
11203 check_nodes = [self.target_node, self.other_node]
11204 elif faulty_secondary:
11205 self.disks = faulty_secondary
11206 self.target_node = secondary_node
11207 self.other_node = instance.primary_node
11208 check_nodes = [self.target_node, self.other_node]
11214 # Non-automatic modes
11215 if self.mode == constants.REPLACE_DISK_PRI:
11216 self.target_node = instance.primary_node
11217 self.other_node = secondary_node
11218 check_nodes = [self.target_node, self.other_node]
11220 elif self.mode == constants.REPLACE_DISK_SEC:
11221 self.target_node = secondary_node
11222 self.other_node = instance.primary_node
11223 check_nodes = [self.target_node, self.other_node]
11225 elif self.mode == constants.REPLACE_DISK_CHG:
11226 self.new_node = remote_node
11227 self.other_node = instance.primary_node
11228 self.target_node = secondary_node
11229 check_nodes = [self.new_node, self.other_node]
11231 _CheckNodeNotDrained(self.lu, remote_node)
11232 _CheckNodeVmCapable(self.lu, remote_node)
11234 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11235 assert old_node_info is not None
11236 if old_node_info.offline and not self.early_release:
11237 # doesn't make sense to delay the release
11238 self.early_release = True
11239 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11240 " early-release mode", secondary_node)
11243 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11246 # If not specified all disks should be replaced
11248 self.disks = range(len(self.instance.disks))
11250 # TODO: This is ugly, but right now we can't distinguish between internal
11251 # submitted opcode and external one. We should fix that.
11252 if self.remote_node_info:
11253 # We change the node, lets verify it still meets instance policy
11254 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11255 cluster = self.cfg.GetClusterInfo()
11256 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11258 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11259 ignore=self.ignore_ipolicy)
11261 for node in check_nodes:
11262 _CheckNodeOnline(self.lu, node)
11264 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11267 if node_name is not None)
11269 # Release unneeded node and node resource locks
11270 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11271 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11273 # Release any owned node group
11274 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11275 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11277 # Check whether disks are valid
11278 for disk_idx in self.disks:
11279 instance.FindDisk(disk_idx)
11281 # Get secondary node IP addresses
11282 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11283 in self.cfg.GetMultiNodeInfo(touched_nodes))
11285 def Exec(self, feedback_fn):
11286 """Execute disk replacement.
11288 This dispatches the disk replacement to the appropriate handler.
11292 # Verify owned locks before starting operation
11293 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11294 assert set(owned_nodes) == set(self.node_secondary_ip), \
11295 ("Incorrect node locks, owning %s, expected %s" %
11296 (owned_nodes, self.node_secondary_ip.keys()))
11297 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11298 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11300 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11301 assert list(owned_instances) == [self.instance_name], \
11302 "Instance '%s' not locked" % self.instance_name
11304 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11305 "Should not own any node group lock at this point"
11308 feedback_fn("No disks need replacement for instance '%s'" %
11309 self.instance.name)
11312 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11313 (utils.CommaJoin(self.disks), self.instance.name))
11314 feedback_fn("Current primary node: %s", self.instance.primary_node)
11315 feedback_fn("Current seconary node: %s",
11316 utils.CommaJoin(self.instance.secondary_nodes))
11318 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11320 # Activate the instance disks if we're replacing them on a down instance
11322 _StartInstanceDisks(self.lu, self.instance, True)
11325 # Should we replace the secondary node?
11326 if self.new_node is not None:
11327 fn = self._ExecDrbd8Secondary
11329 fn = self._ExecDrbd8DiskOnly
11331 result = fn(feedback_fn)
11333 # Deactivate the instance disks if we're replacing them on a
11336 _SafeShutdownInstanceDisks(self.lu, self.instance)
11338 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11341 # Verify owned locks
11342 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11343 nodes = frozenset(self.node_secondary_ip)
11344 assert ((self.early_release and not owned_nodes) or
11345 (not self.early_release and not (set(owned_nodes) - nodes))), \
11346 ("Not owning the correct locks, early_release=%s, owned=%r,"
11347 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11351 def _CheckVolumeGroup(self, nodes):
11352 self.lu.LogInfo("Checking volume groups")
11354 vgname = self.cfg.GetVGName()
11356 # Make sure volume group exists on all involved nodes
11357 results = self.rpc.call_vg_list(nodes)
11359 raise errors.OpExecError("Can't list volume groups on the nodes")
11362 res = results[node]
11363 res.Raise("Error checking node %s" % node)
11364 if vgname not in res.payload:
11365 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11368 def _CheckDisksExistence(self, nodes):
11369 # Check disk existence
11370 for idx, dev in enumerate(self.instance.disks):
11371 if idx not in self.disks:
11375 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11376 self.cfg.SetDiskID(dev, node)
11378 result = _BlockdevFind(self, node, dev, self.instance)
11380 msg = result.fail_msg
11381 if msg or not result.payload:
11383 msg = "disk not found"
11384 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11387 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11388 for idx, dev in enumerate(self.instance.disks):
11389 if idx not in self.disks:
11392 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11395 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11396 on_primary, ldisk=ldisk):
11397 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11398 " replace disks for instance %s" %
11399 (node_name, self.instance.name))
11401 def _CreateNewStorage(self, node_name):
11402 """Create new storage on the primary or secondary node.
11404 This is only used for same-node replaces, not for changing the
11405 secondary node, hence we don't want to modify the existing disk.
11410 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11411 for idx, dev in enumerate(disks):
11412 if idx not in self.disks:
11415 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11417 self.cfg.SetDiskID(dev, node_name)
11419 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11420 names = _GenerateUniqueNames(self.lu, lv_names)
11422 (data_disk, meta_disk) = dev.children
11423 vg_data = data_disk.logical_id[0]
11424 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11425 logical_id=(vg_data, names[0]),
11426 params=data_disk.params)
11427 vg_meta = meta_disk.logical_id[0]
11428 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11429 size=constants.DRBD_META_SIZE,
11430 logical_id=(vg_meta, names[1]),
11431 params=meta_disk.params)
11433 new_lvs = [lv_data, lv_meta]
11434 old_lvs = [child.Copy() for child in dev.children]
11435 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11437 # we pass force_create=True to force the LVM creation
11438 for new_lv in new_lvs:
11439 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11440 _GetInstanceInfoText(self.instance), False)
11444 def _CheckDevices(self, node_name, iv_names):
11445 for name, (dev, _, _) in iv_names.iteritems():
11446 self.cfg.SetDiskID(dev, node_name)
11448 result = _BlockdevFind(self, node_name, dev, self.instance)
11450 msg = result.fail_msg
11451 if msg or not result.payload:
11453 msg = "disk not found"
11454 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11457 if result.payload.is_degraded:
11458 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11460 def _RemoveOldStorage(self, node_name, iv_names):
11461 for name, (_, old_lvs, _) in iv_names.iteritems():
11462 self.lu.LogInfo("Remove logical volumes for %s", name)
11465 self.cfg.SetDiskID(lv, node_name)
11467 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11469 self.lu.LogWarning("Can't remove old LV: %s", msg,
11470 hint="remove unused LVs manually")
11472 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11473 """Replace a disk on the primary or secondary for DRBD 8.
11475 The algorithm for replace is quite complicated:
11477 1. for each disk to be replaced:
11479 1. create new LVs on the target node with unique names
11480 1. detach old LVs from the drbd device
11481 1. rename old LVs to name_replaced.<time_t>
11482 1. rename new LVs to old LVs
11483 1. attach the new LVs (with the old names now) to the drbd device
11485 1. wait for sync across all devices
11487 1. for each modified disk:
11489 1. remove old LVs (which have the name name_replaces.<time_t>)
11491 Failures are not very well handled.
11496 # Step: check device activation
11497 self.lu.LogStep(1, steps_total, "Check device existence")
11498 self._CheckDisksExistence([self.other_node, self.target_node])
11499 self._CheckVolumeGroup([self.target_node, self.other_node])
11501 # Step: check other node consistency
11502 self.lu.LogStep(2, steps_total, "Check peer consistency")
11503 self._CheckDisksConsistency(self.other_node,
11504 self.other_node == self.instance.primary_node,
11507 # Step: create new storage
11508 self.lu.LogStep(3, steps_total, "Allocate new storage")
11509 iv_names = self._CreateNewStorage(self.target_node)
11511 # Step: for each lv, detach+rename*2+attach
11512 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11513 for dev, old_lvs, new_lvs in iv_names.itervalues():
11514 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11516 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11518 result.Raise("Can't detach drbd from local storage on node"
11519 " %s for device %s" % (self.target_node, dev.iv_name))
11521 #cfg.Update(instance)
11523 # ok, we created the new LVs, so now we know we have the needed
11524 # storage; as such, we proceed on the target node to rename
11525 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11526 # using the assumption that logical_id == physical_id (which in
11527 # turn is the unique_id on that node)
11529 # FIXME(iustin): use a better name for the replaced LVs
11530 temp_suffix = int(time.time())
11531 ren_fn = lambda d, suff: (d.physical_id[0],
11532 d.physical_id[1] + "_replaced-%s" % suff)
11534 # Build the rename list based on what LVs exist on the node
11535 rename_old_to_new = []
11536 for to_ren in old_lvs:
11537 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11538 if not result.fail_msg and result.payload:
11540 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11542 self.lu.LogInfo("Renaming the old LVs on the target node")
11543 result = self.rpc.call_blockdev_rename(self.target_node,
11545 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11547 # Now we rename the new LVs to the old LVs
11548 self.lu.LogInfo("Renaming the new LVs on the target node")
11549 rename_new_to_old = [(new, old.physical_id)
11550 for old, new in zip(old_lvs, new_lvs)]
11551 result = self.rpc.call_blockdev_rename(self.target_node,
11553 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11555 # Intermediate steps of in memory modifications
11556 for old, new in zip(old_lvs, new_lvs):
11557 new.logical_id = old.logical_id
11558 self.cfg.SetDiskID(new, self.target_node)
11560 # We need to modify old_lvs so that removal later removes the
11561 # right LVs, not the newly added ones; note that old_lvs is a
11563 for disk in old_lvs:
11564 disk.logical_id = ren_fn(disk, temp_suffix)
11565 self.cfg.SetDiskID(disk, self.target_node)
11567 # Now that the new lvs have the old name, we can add them to the device
11568 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11569 result = self.rpc.call_blockdev_addchildren(self.target_node,
11570 (dev, self.instance), new_lvs)
11571 msg = result.fail_msg
11573 for new_lv in new_lvs:
11574 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11577 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11578 hint=("cleanup manually the unused logical"
11580 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11582 cstep = itertools.count(5)
11584 if self.early_release:
11585 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11586 self._RemoveOldStorage(self.target_node, iv_names)
11587 # TODO: Check if releasing locks early still makes sense
11588 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11590 # Release all resource locks except those used by the instance
11591 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11592 keep=self.node_secondary_ip.keys())
11594 # Release all node locks while waiting for sync
11595 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11597 # TODO: Can the instance lock be downgraded here? Take the optional disk
11598 # shutdown in the caller into consideration.
11601 # This can fail as the old devices are degraded and _WaitForSync
11602 # does a combined result over all disks, so we don't check its return value
11603 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11604 _WaitForSync(self.lu, self.instance)
11606 # Check all devices manually
11607 self._CheckDevices(self.instance.primary_node, iv_names)
11609 # Step: remove old storage
11610 if not self.early_release:
11611 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11612 self._RemoveOldStorage(self.target_node, iv_names)
11614 def _ExecDrbd8Secondary(self, feedback_fn):
11615 """Replace the secondary node for DRBD 8.
11617 The algorithm for replace is quite complicated:
11618 - for all disks of the instance:
11619 - create new LVs on the new node with same names
11620 - shutdown the drbd device on the old secondary
11621 - disconnect the drbd network on the primary
11622 - create the drbd device on the new secondary
11623 - network attach the drbd on the primary, using an artifice:
11624 the drbd code for Attach() will connect to the network if it
11625 finds a device which is connected to the good local disks but
11626 not network enabled
11627 - wait for sync across all devices
11628 - remove all disks from the old secondary
11630 Failures are not very well handled.
11635 pnode = self.instance.primary_node
11637 # Step: check device activation
11638 self.lu.LogStep(1, steps_total, "Check device existence")
11639 self._CheckDisksExistence([self.instance.primary_node])
11640 self._CheckVolumeGroup([self.instance.primary_node])
11642 # Step: check other node consistency
11643 self.lu.LogStep(2, steps_total, "Check peer consistency")
11644 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11646 # Step: create new storage
11647 self.lu.LogStep(3, steps_total, "Allocate new storage")
11648 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11649 for idx, dev in enumerate(disks):
11650 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11651 (self.new_node, idx))
11652 # we pass force_create=True to force LVM creation
11653 for new_lv in dev.children:
11654 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11655 True, _GetInstanceInfoText(self.instance), False)
11657 # Step 4: dbrd minors and drbd setups changes
11658 # after this, we must manually remove the drbd minors on both the
11659 # error and the success paths
11660 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11661 minors = self.cfg.AllocateDRBDMinor([self.new_node
11662 for dev in self.instance.disks],
11663 self.instance.name)
11664 logging.debug("Allocated minors %r", minors)
11667 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11668 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11669 (self.new_node, idx))
11670 # create new devices on new_node; note that we create two IDs:
11671 # one without port, so the drbd will be activated without
11672 # networking information on the new node at this stage, and one
11673 # with network, for the latter activation in step 4
11674 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11675 if self.instance.primary_node == o_node1:
11678 assert self.instance.primary_node == o_node2, "Three-node instance?"
11681 new_alone_id = (self.instance.primary_node, self.new_node, None,
11682 p_minor, new_minor, o_secret)
11683 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11684 p_minor, new_minor, o_secret)
11686 iv_names[idx] = (dev, dev.children, new_net_id)
11687 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11689 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11690 logical_id=new_alone_id,
11691 children=dev.children,
11694 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11697 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11699 _GetInstanceInfoText(self.instance), False)
11700 except errors.GenericError:
11701 self.cfg.ReleaseDRBDMinors(self.instance.name)
11704 # We have new devices, shutdown the drbd on the old secondary
11705 for idx, dev in enumerate(self.instance.disks):
11706 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11707 self.cfg.SetDiskID(dev, self.target_node)
11708 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11709 (dev, self.instance)).fail_msg
11711 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11712 "node: %s" % (idx, msg),
11713 hint=("Please cleanup this device manually as"
11714 " soon as possible"))
11716 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11717 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11718 self.instance.disks)[pnode]
11720 msg = result.fail_msg
11722 # detaches didn't succeed (unlikely)
11723 self.cfg.ReleaseDRBDMinors(self.instance.name)
11724 raise errors.OpExecError("Can't detach the disks from the network on"
11725 " old node: %s" % (msg,))
11727 # if we managed to detach at least one, we update all the disks of
11728 # the instance to point to the new secondary
11729 self.lu.LogInfo("Updating instance configuration")
11730 for dev, _, new_logical_id in iv_names.itervalues():
11731 dev.logical_id = new_logical_id
11732 self.cfg.SetDiskID(dev, self.instance.primary_node)
11734 self.cfg.Update(self.instance, feedback_fn)
11736 # Release all node locks (the configuration has been updated)
11737 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11739 # and now perform the drbd attach
11740 self.lu.LogInfo("Attaching primary drbds to new secondary"
11741 " (standalone => connected)")
11742 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11744 self.node_secondary_ip,
11745 (self.instance.disks, self.instance),
11746 self.instance.name,
11748 for to_node, to_result in result.items():
11749 msg = to_result.fail_msg
11751 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11753 hint=("please do a gnt-instance info to see the"
11754 " status of disks"))
11756 cstep = itertools.count(5)
11758 if self.early_release:
11759 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11760 self._RemoveOldStorage(self.target_node, iv_names)
11761 # TODO: Check if releasing locks early still makes sense
11762 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11764 # Release all resource locks except those used by the instance
11765 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11766 keep=self.node_secondary_ip.keys())
11768 # TODO: Can the instance lock be downgraded here? Take the optional disk
11769 # shutdown in the caller into consideration.
11772 # This can fail as the old devices are degraded and _WaitForSync
11773 # does a combined result over all disks, so we don't check its return value
11774 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11775 _WaitForSync(self.lu, self.instance)
11777 # Check all devices manually
11778 self._CheckDevices(self.instance.primary_node, iv_names)
11780 # Step: remove old storage
11781 if not self.early_release:
11782 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11783 self._RemoveOldStorage(self.target_node, iv_names)
11786 class LURepairNodeStorage(NoHooksLU):
11787 """Repairs the volume group on a node.
11792 def CheckArguments(self):
11793 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11795 storage_type = self.op.storage_type
11797 if (constants.SO_FIX_CONSISTENCY not in
11798 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11799 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11800 " repaired" % storage_type,
11801 errors.ECODE_INVAL)
11803 def ExpandNames(self):
11804 self.needed_locks = {
11805 locking.LEVEL_NODE: [self.op.node_name],
11808 def _CheckFaultyDisks(self, instance, node_name):
11809 """Ensure faulty disks abort the opcode or at least warn."""
11811 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11813 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11814 " node '%s'" % (instance.name, node_name),
11815 errors.ECODE_STATE)
11816 except errors.OpPrereqError, err:
11817 if self.op.ignore_consistency:
11818 self.LogWarning(str(err.args[0]))
11822 def CheckPrereq(self):
11823 """Check prerequisites.
11826 # Check whether any instance on this node has faulty disks
11827 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11828 if inst.admin_state != constants.ADMINST_UP:
11830 check_nodes = set(inst.all_nodes)
11831 check_nodes.discard(self.op.node_name)
11832 for inst_node_name in check_nodes:
11833 self._CheckFaultyDisks(inst, inst_node_name)
11835 def Exec(self, feedback_fn):
11836 feedback_fn("Repairing storage unit '%s' on %s ..." %
11837 (self.op.name, self.op.node_name))
11839 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11840 result = self.rpc.call_storage_execute(self.op.node_name,
11841 self.op.storage_type, st_args,
11843 constants.SO_FIX_CONSISTENCY)
11844 result.Raise("Failed to repair storage unit '%s' on %s" %
11845 (self.op.name, self.op.node_name))
11848 class LUNodeEvacuate(NoHooksLU):
11849 """Evacuates instances off a list of nodes.
11854 _MODE2IALLOCATOR = {
11855 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11856 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11857 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11859 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11860 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11861 constants.IALLOCATOR_NEVAC_MODES)
11863 def CheckArguments(self):
11864 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11866 def ExpandNames(self):
11867 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11869 if self.op.remote_node is not None:
11870 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11871 assert self.op.remote_node
11873 if self.op.remote_node == self.op.node_name:
11874 raise errors.OpPrereqError("Can not use evacuated node as a new"
11875 " secondary node", errors.ECODE_INVAL)
11877 if self.op.mode != constants.NODE_EVAC_SEC:
11878 raise errors.OpPrereqError("Without the use of an iallocator only"
11879 " secondary instances can be evacuated",
11880 errors.ECODE_INVAL)
11883 self.share_locks = _ShareAll()
11884 self.needed_locks = {
11885 locking.LEVEL_INSTANCE: [],
11886 locking.LEVEL_NODEGROUP: [],
11887 locking.LEVEL_NODE: [],
11890 # Determine nodes (via group) optimistically, needs verification once locks
11891 # have been acquired
11892 self.lock_nodes = self._DetermineNodes()
11894 def _DetermineNodes(self):
11895 """Gets the list of nodes to operate on.
11898 if self.op.remote_node is None:
11899 # Iallocator will choose any node(s) in the same group
11900 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11902 group_nodes = frozenset([self.op.remote_node])
11904 # Determine nodes to be locked
11905 return set([self.op.node_name]) | group_nodes
11907 def _DetermineInstances(self):
11908 """Builds list of instances to operate on.
11911 assert self.op.mode in constants.NODE_EVAC_MODES
11913 if self.op.mode == constants.NODE_EVAC_PRI:
11914 # Primary instances only
11915 inst_fn = _GetNodePrimaryInstances
11916 assert self.op.remote_node is None, \
11917 "Evacuating primary instances requires iallocator"
11918 elif self.op.mode == constants.NODE_EVAC_SEC:
11919 # Secondary instances only
11920 inst_fn = _GetNodeSecondaryInstances
11923 assert self.op.mode == constants.NODE_EVAC_ALL
11924 inst_fn = _GetNodeInstances
11925 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11927 raise errors.OpPrereqError("Due to an issue with the iallocator"
11928 " interface it is not possible to evacuate"
11929 " all instances at once; specify explicitly"
11930 " whether to evacuate primary or secondary"
11932 errors.ECODE_INVAL)
11934 return inst_fn(self.cfg, self.op.node_name)
11936 def DeclareLocks(self, level):
11937 if level == locking.LEVEL_INSTANCE:
11938 # Lock instances optimistically, needs verification once node and group
11939 # locks have been acquired
11940 self.needed_locks[locking.LEVEL_INSTANCE] = \
11941 set(i.name for i in self._DetermineInstances())
11943 elif level == locking.LEVEL_NODEGROUP:
11944 # Lock node groups for all potential target nodes optimistically, needs
11945 # verification once nodes have been acquired
11946 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11947 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11949 elif level == locking.LEVEL_NODE:
11950 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11952 def CheckPrereq(self):
11954 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11955 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11956 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11958 need_nodes = self._DetermineNodes()
11960 if not owned_nodes.issuperset(need_nodes):
11961 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11962 " locks were acquired, current nodes are"
11963 " are '%s', used to be '%s'; retry the"
11965 (self.op.node_name,
11966 utils.CommaJoin(need_nodes),
11967 utils.CommaJoin(owned_nodes)),
11968 errors.ECODE_STATE)
11970 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11971 if owned_groups != wanted_groups:
11972 raise errors.OpExecError("Node groups changed since locks were acquired,"
11973 " current groups are '%s', used to be '%s';"
11974 " retry the operation" %
11975 (utils.CommaJoin(wanted_groups),
11976 utils.CommaJoin(owned_groups)))
11978 # Determine affected instances
11979 self.instances = self._DetermineInstances()
11980 self.instance_names = [i.name for i in self.instances]
11982 if set(self.instance_names) != owned_instances:
11983 raise errors.OpExecError("Instances on node '%s' changed since locks"
11984 " were acquired, current instances are '%s',"
11985 " used to be '%s'; retry the operation" %
11986 (self.op.node_name,
11987 utils.CommaJoin(self.instance_names),
11988 utils.CommaJoin(owned_instances)))
11990 if self.instance_names:
11991 self.LogInfo("Evacuating instances from node '%s': %s",
11993 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11995 self.LogInfo("No instances to evacuate from node '%s'",
11998 if self.op.remote_node is not None:
11999 for i in self.instances:
12000 if i.primary_node == self.op.remote_node:
12001 raise errors.OpPrereqError("Node %s is the primary node of"
12002 " instance %s, cannot use it as"
12004 (self.op.remote_node, i.name),
12005 errors.ECODE_INVAL)
12007 def Exec(self, feedback_fn):
12008 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12010 if not self.instance_names:
12011 # No instances to evacuate
12014 elif self.op.iallocator is not None:
12015 # TODO: Implement relocation to other group
12016 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12017 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12018 instances=list(self.instance_names))
12019 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12021 ial.Run(self.op.iallocator)
12023 if not ial.success:
12024 raise errors.OpPrereqError("Can't compute node evacuation using"
12025 " iallocator '%s': %s" %
12026 (self.op.iallocator, ial.info),
12027 errors.ECODE_NORES)
12029 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12031 elif self.op.remote_node is not None:
12032 assert self.op.mode == constants.NODE_EVAC_SEC
12034 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12035 remote_node=self.op.remote_node,
12037 mode=constants.REPLACE_DISK_CHG,
12038 early_release=self.op.early_release)]
12039 for instance_name in self.instance_names]
12042 raise errors.ProgrammerError("No iallocator or remote node")
12044 return ResultWithJobs(jobs)
12047 def _SetOpEarlyRelease(early_release, op):
12048 """Sets C{early_release} flag on opcodes if available.
12052 op.early_release = early_release
12053 except AttributeError:
12054 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12059 def _NodeEvacDest(use_nodes, group, nodes):
12060 """Returns group or nodes depending on caller's choice.
12064 return utils.CommaJoin(nodes)
12069 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12070 """Unpacks the result of change-group and node-evacuate iallocator requests.
12072 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12073 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12075 @type lu: L{LogicalUnit}
12076 @param lu: Logical unit instance
12077 @type alloc_result: tuple/list
12078 @param alloc_result: Result from iallocator
12079 @type early_release: bool
12080 @param early_release: Whether to release locks early if possible
12081 @type use_nodes: bool
12082 @param use_nodes: Whether to display node names instead of groups
12085 (moved, failed, jobs) = alloc_result
12088 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12089 for (name, reason) in failed)
12090 lu.LogWarning("Unable to evacuate instances %s", failreason)
12091 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12094 lu.LogInfo("Instances to be moved: %s",
12095 utils.CommaJoin("%s (to %s)" %
12096 (name, _NodeEvacDest(use_nodes, group, nodes))
12097 for (name, group, nodes) in moved))
12099 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12100 map(opcodes.OpCode.LoadOpCode, ops))
12104 def _DiskSizeInBytesToMebibytes(lu, size):
12105 """Converts a disk size in bytes to mebibytes.
12107 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12110 (mib, remainder) = divmod(size, 1024 * 1024)
12113 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12114 " to not overwrite existing data (%s bytes will not be"
12115 " wiped)", (1024 * 1024) - remainder)
12121 class LUInstanceGrowDisk(LogicalUnit):
12122 """Grow a disk of an instance.
12125 HPATH = "disk-grow"
12126 HTYPE = constants.HTYPE_INSTANCE
12129 def ExpandNames(self):
12130 self._ExpandAndLockInstance()
12131 self.needed_locks[locking.LEVEL_NODE] = []
12132 self.needed_locks[locking.LEVEL_NODE_RES] = []
12133 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12134 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12136 def DeclareLocks(self, level):
12137 if level == locking.LEVEL_NODE:
12138 self._LockInstancesNodes()
12139 elif level == locking.LEVEL_NODE_RES:
12141 self.needed_locks[locking.LEVEL_NODE_RES] = \
12142 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12144 def BuildHooksEnv(self):
12145 """Build hooks env.
12147 This runs on the master, the primary and all the secondaries.
12151 "DISK": self.op.disk,
12152 "AMOUNT": self.op.amount,
12153 "ABSOLUTE": self.op.absolute,
12155 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12158 def BuildHooksNodes(self):
12159 """Build hooks nodes.
12162 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12165 def CheckPrereq(self):
12166 """Check prerequisites.
12168 This checks that the instance is in the cluster.
12171 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12172 assert instance is not None, \
12173 "Cannot retrieve locked instance %s" % self.op.instance_name
12174 nodenames = list(instance.all_nodes)
12175 for node in nodenames:
12176 _CheckNodeOnline(self, node)
12178 self.instance = instance
12180 if instance.disk_template not in constants.DTS_GROWABLE:
12181 raise errors.OpPrereqError("Instance's disk layout does not support"
12182 " growing", errors.ECODE_INVAL)
12184 self.disk = instance.FindDisk(self.op.disk)
12186 if self.op.absolute:
12187 self.target = self.op.amount
12188 self.delta = self.target - self.disk.size
12190 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12191 "current disk size (%s)" %
12192 (utils.FormatUnit(self.target, "h"),
12193 utils.FormatUnit(self.disk.size, "h")),
12194 errors.ECODE_STATE)
12196 self.delta = self.op.amount
12197 self.target = self.disk.size + self.delta
12199 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12200 utils.FormatUnit(self.delta, "h"),
12201 errors.ECODE_INVAL)
12203 if instance.disk_template not in (constants.DT_FILE,
12204 constants.DT_SHARED_FILE,
12206 # TODO: check the free disk space for file, when that feature will be
12208 _CheckNodesFreeDiskPerVG(self, nodenames,
12209 self.disk.ComputeGrowth(self.delta))
12211 def Exec(self, feedback_fn):
12212 """Execute disk grow.
12215 instance = self.instance
12218 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12219 assert (self.owned_locks(locking.LEVEL_NODE) ==
12220 self.owned_locks(locking.LEVEL_NODE_RES))
12222 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12224 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12226 raise errors.OpExecError("Cannot activate block device to grow")
12228 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12229 (self.op.disk, instance.name,
12230 utils.FormatUnit(self.delta, "h"),
12231 utils.FormatUnit(self.target, "h")))
12233 # First run all grow ops in dry-run mode
12234 for node in instance.all_nodes:
12235 self.cfg.SetDiskID(disk, node)
12236 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12238 result.Raise("Dry-run grow request failed to node %s" % node)
12241 # Get disk size from primary node for wiping
12242 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12243 result.Raise("Failed to retrieve disk size from node '%s'" %
12244 instance.primary_node)
12246 (disk_size_in_bytes, ) = result.payload
12248 if disk_size_in_bytes is None:
12249 raise errors.OpExecError("Failed to retrieve disk size from primary"
12250 " node '%s'" % instance.primary_node)
12252 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12254 assert old_disk_size >= disk.size, \
12255 ("Retrieved disk size too small (got %s, should be at least %s)" %
12256 (old_disk_size, disk.size))
12258 old_disk_size = None
12260 # We know that (as far as we can test) operations across different
12261 # nodes will succeed, time to run it for real on the backing storage
12262 for node in instance.all_nodes:
12263 self.cfg.SetDiskID(disk, node)
12264 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12266 result.Raise("Grow request failed to node %s" % node)
12268 # And now execute it for logical storage, on the primary node
12269 node = instance.primary_node
12270 self.cfg.SetDiskID(disk, node)
12271 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12273 result.Raise("Grow request failed to node %s" % node)
12275 disk.RecordGrow(self.delta)
12276 self.cfg.Update(instance, feedback_fn)
12278 # Changes have been recorded, release node lock
12279 _ReleaseLocks(self, locking.LEVEL_NODE)
12281 # Downgrade lock while waiting for sync
12282 self.glm.downgrade(locking.LEVEL_INSTANCE)
12284 assert wipe_disks ^ (old_disk_size is None)
12287 assert instance.disks[self.op.disk] == disk
12289 # Wipe newly added disk space
12290 _WipeDisks(self, instance,
12291 disks=[(self.op.disk, disk, old_disk_size)])
12293 if self.op.wait_for_sync:
12294 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12296 self.LogWarning("Disk syncing has not returned a good status; check"
12298 if instance.admin_state != constants.ADMINST_UP:
12299 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12300 elif instance.admin_state != constants.ADMINST_UP:
12301 self.LogWarning("Not shutting down the disk even if the instance is"
12302 " not supposed to be running because no wait for"
12303 " sync mode was requested")
12305 assert self.owned_locks(locking.LEVEL_NODE_RES)
12306 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12309 class LUInstanceQueryData(NoHooksLU):
12310 """Query runtime instance data.
12315 def ExpandNames(self):
12316 self.needed_locks = {}
12318 # Use locking if requested or when non-static information is wanted
12319 if not (self.op.static or self.op.use_locking):
12320 self.LogWarning("Non-static data requested, locks need to be acquired")
12321 self.op.use_locking = True
12323 if self.op.instances or not self.op.use_locking:
12324 # Expand instance names right here
12325 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12327 # Will use acquired locks
12328 self.wanted_names = None
12330 if self.op.use_locking:
12331 self.share_locks = _ShareAll()
12333 if self.wanted_names is None:
12334 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12336 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12338 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12339 self.needed_locks[locking.LEVEL_NODE] = []
12340 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12342 def DeclareLocks(self, level):
12343 if self.op.use_locking:
12344 if level == locking.LEVEL_NODEGROUP:
12345 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12347 # Lock all groups used by instances optimistically; this requires going
12348 # via the node before it's locked, requiring verification later on
12349 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12350 frozenset(group_uuid
12351 for instance_name in owned_instances
12353 self.cfg.GetInstanceNodeGroups(instance_name))
12355 elif level == locking.LEVEL_NODE:
12356 self._LockInstancesNodes()
12358 def CheckPrereq(self):
12359 """Check prerequisites.
12361 This only checks the optional instance list against the existing names.
12364 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12365 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12366 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12368 if self.wanted_names is None:
12369 assert self.op.use_locking, "Locking was not used"
12370 self.wanted_names = owned_instances
12372 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12374 if self.op.use_locking:
12375 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12378 assert not (owned_instances or owned_groups or owned_nodes)
12380 self.wanted_instances = instances.values()
12382 def _ComputeBlockdevStatus(self, node, instance, dev):
12383 """Returns the status of a block device
12386 if self.op.static or not node:
12389 self.cfg.SetDiskID(dev, node)
12391 result = self.rpc.call_blockdev_find(node, dev)
12395 result.Raise("Can't compute disk status for %s" % instance.name)
12397 status = result.payload
12401 return (status.dev_path, status.major, status.minor,
12402 status.sync_percent, status.estimated_time,
12403 status.is_degraded, status.ldisk_status)
12405 def _ComputeDiskStatus(self, instance, snode, dev):
12406 """Compute block device status.
12409 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12411 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12413 def _ComputeDiskStatusInner(self, instance, snode, dev):
12414 """Compute block device status.
12416 @attention: The device has to be annotated already.
12419 if dev.dev_type in constants.LDS_DRBD:
12420 # we change the snode then (otherwise we use the one passed in)
12421 if dev.logical_id[0] == instance.primary_node:
12422 snode = dev.logical_id[1]
12424 snode = dev.logical_id[0]
12426 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12428 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12431 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12438 "iv_name": dev.iv_name,
12439 "dev_type": dev.dev_type,
12440 "logical_id": dev.logical_id,
12441 "physical_id": dev.physical_id,
12442 "pstatus": dev_pstatus,
12443 "sstatus": dev_sstatus,
12444 "children": dev_children,
12449 def Exec(self, feedback_fn):
12450 """Gather and return data"""
12453 cluster = self.cfg.GetClusterInfo()
12455 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12456 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12458 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12459 for node in nodes.values()))
12461 group2name_fn = lambda uuid: groups[uuid].name
12463 for instance in self.wanted_instances:
12464 pnode = nodes[instance.primary_node]
12466 if self.op.static or pnode.offline:
12467 remote_state = None
12469 self.LogWarning("Primary node %s is marked offline, returning static"
12470 " information only for instance %s" %
12471 (pnode.name, instance.name))
12473 remote_info = self.rpc.call_instance_info(instance.primary_node,
12475 instance.hypervisor)
12476 remote_info.Raise("Error checking node %s" % instance.primary_node)
12477 remote_info = remote_info.payload
12478 if remote_info and "state" in remote_info:
12479 remote_state = "up"
12481 if instance.admin_state == constants.ADMINST_UP:
12482 remote_state = "down"
12484 remote_state = instance.admin_state
12486 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12489 snodes_group_uuids = [nodes[snode_name].group
12490 for snode_name in instance.secondary_nodes]
12492 result[instance.name] = {
12493 "name": instance.name,
12494 "config_state": instance.admin_state,
12495 "run_state": remote_state,
12496 "pnode": instance.primary_node,
12497 "pnode_group_uuid": pnode.group,
12498 "pnode_group_name": group2name_fn(pnode.group),
12499 "snodes": instance.secondary_nodes,
12500 "snodes_group_uuids": snodes_group_uuids,
12501 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12503 # this happens to be the same format used for hooks
12504 "nics": _NICListToTuple(self, instance.nics),
12505 "disk_template": instance.disk_template,
12507 "hypervisor": instance.hypervisor,
12508 "network_port": instance.network_port,
12509 "hv_instance": instance.hvparams,
12510 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12511 "be_instance": instance.beparams,
12512 "be_actual": cluster.FillBE(instance),
12513 "os_instance": instance.osparams,
12514 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12515 "serial_no": instance.serial_no,
12516 "mtime": instance.mtime,
12517 "ctime": instance.ctime,
12518 "uuid": instance.uuid,
12524 def PrepareContainerMods(mods, private_fn):
12525 """Prepares a list of container modifications by adding a private data field.
12527 @type mods: list of tuples; (operation, index, parameters)
12528 @param mods: List of modifications
12529 @type private_fn: callable or None
12530 @param private_fn: Callable for constructing a private data field for a
12535 if private_fn is None:
12540 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12543 #: Type description for changes as returned by L{ApplyContainerMods}'s
12545 _TApplyContModsCbChanges = \
12546 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12547 ht.TNonEmptyString,
12552 def ApplyContainerMods(kind, container, chgdesc, mods,
12553 create_fn, modify_fn, remove_fn):
12554 """Applies descriptions in C{mods} to C{container}.
12557 @param kind: One-word item description
12558 @type container: list
12559 @param container: Container to modify
12560 @type chgdesc: None or list
12561 @param chgdesc: List of applied changes
12563 @param mods: Modifications as returned by L{PrepareContainerMods}
12564 @type create_fn: callable
12565 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12566 receives absolute item index, parameters and private data object as added
12567 by L{PrepareContainerMods}, returns tuple containing new item and changes
12569 @type modify_fn: callable
12570 @param modify_fn: Callback for modifying an existing item
12571 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12572 and private data object as added by L{PrepareContainerMods}, returns
12574 @type remove_fn: callable
12575 @param remove_fn: Callback on removing item; receives absolute item index,
12576 item and private data object as added by L{PrepareContainerMods}
12579 for (op, idx, params, private) in mods:
12582 absidx = len(container) - 1
12584 raise IndexError("Not accepting negative indices other than -1")
12585 elif idx > len(container):
12586 raise IndexError("Got %s index %s, but there are only %s" %
12587 (kind, idx, len(container)))
12593 if op == constants.DDM_ADD:
12594 # Calculate where item will be added
12596 addidx = len(container)
12600 if create_fn is None:
12603 (item, changes) = create_fn(addidx, params, private)
12606 container.append(item)
12609 assert idx <= len(container)
12610 # list.insert does so before the specified index
12611 container.insert(idx, item)
12613 # Retrieve existing item
12615 item = container[absidx]
12617 raise IndexError("Invalid %s index %s" % (kind, idx))
12619 if op == constants.DDM_REMOVE:
12622 if remove_fn is not None:
12623 remove_fn(absidx, item, private)
12625 changes = [("%s/%s" % (kind, absidx), "remove")]
12627 assert container[absidx] == item
12628 del container[absidx]
12629 elif op == constants.DDM_MODIFY:
12630 if modify_fn is not None:
12631 changes = modify_fn(absidx, item, params, private)
12633 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12635 assert _TApplyContModsCbChanges(changes)
12637 if not (chgdesc is None or changes is None):
12638 chgdesc.extend(changes)
12641 def _UpdateIvNames(base_index, disks):
12642 """Updates the C{iv_name} attribute of disks.
12644 @type disks: list of L{objects.Disk}
12647 for (idx, disk) in enumerate(disks):
12648 disk.iv_name = "disk/%s" % (base_index + idx, )
12651 class _InstNicModPrivate:
12652 """Data structure for network interface modifications.
12654 Used by L{LUInstanceSetParams}.
12657 def __init__(self):
12662 class LUInstanceSetParams(LogicalUnit):
12663 """Modifies an instances's parameters.
12666 HPATH = "instance-modify"
12667 HTYPE = constants.HTYPE_INSTANCE
12671 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12672 assert ht.TList(mods)
12673 assert not mods or len(mods[0]) in (2, 3)
12675 if mods and len(mods[0]) == 2:
12679 for op, params in mods:
12680 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12681 result.append((op, -1, params))
12685 raise errors.OpPrereqError("Only one %s add or remove operation is"
12686 " supported at a time" % kind,
12687 errors.ECODE_INVAL)
12689 result.append((constants.DDM_MODIFY, op, params))
12691 assert verify_fn(result)
12698 def _CheckMods(kind, mods, key_types, item_fn):
12699 """Ensures requested disk/NIC modifications are valid.
12702 for (op, _, params) in mods:
12703 assert ht.TDict(params)
12705 utils.ForceDictType(params, key_types)
12707 if op == constants.DDM_REMOVE:
12709 raise errors.OpPrereqError("No settings should be passed when"
12710 " removing a %s" % kind,
12711 errors.ECODE_INVAL)
12712 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12713 item_fn(op, params)
12715 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12718 def _VerifyDiskModification(op, params):
12719 """Verifies a disk modification.
12722 if op == constants.DDM_ADD:
12723 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12724 if mode not in constants.DISK_ACCESS_SET:
12725 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12726 errors.ECODE_INVAL)
12728 size = params.get(constants.IDISK_SIZE, None)
12730 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12731 constants.IDISK_SIZE, errors.ECODE_INVAL)
12735 except (TypeError, ValueError), err:
12736 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12737 errors.ECODE_INVAL)
12739 params[constants.IDISK_SIZE] = size
12741 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12742 raise errors.OpPrereqError("Disk size change not possible, use"
12743 " grow-disk", errors.ECODE_INVAL)
12746 def _VerifyNicModification(op, params):
12747 """Verifies a network interface modification.
12750 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12751 ip = params.get(constants.INIC_IP, None)
12752 req_net = params.get(constants.INIC_NETWORK, None)
12753 link = params.get(constants.NIC_LINK, None)
12754 mode = params.get(constants.NIC_MODE, None)
12755 if req_net is not None:
12756 if req_net.lower() == constants.VALUE_NONE:
12757 params[constants.INIC_NETWORK] = None
12759 elif link is not None or mode is not None:
12760 raise errors.OpPrereqError("If network is given"
12761 " mode or link should not",
12762 errors.ECODE_INVAL)
12764 if op == constants.DDM_ADD:
12765 macaddr = params.get(constants.INIC_MAC, None)
12766 if macaddr is None:
12767 params[constants.INIC_MAC] = constants.VALUE_AUTO
12770 if ip.lower() == constants.VALUE_NONE:
12771 params[constants.INIC_IP] = None
12773 if ip.lower() == constants.NIC_IP_POOL:
12774 if op == constants.DDM_ADD and req_net is None:
12775 raise errors.OpPrereqError("If ip=pool, parameter network"
12777 errors.ECODE_INVAL)
12779 if not netutils.IPAddress.IsValid(ip):
12780 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12781 errors.ECODE_INVAL)
12783 if constants.INIC_MAC in params:
12784 macaddr = params[constants.INIC_MAC]
12785 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12786 macaddr = utils.NormalizeAndValidateMac(macaddr)
12788 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12789 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12790 " modifying an existing NIC",
12791 errors.ECODE_INVAL)
12793 def CheckArguments(self):
12794 if not (self.op.nics or self.op.disks or self.op.disk_template or
12795 self.op.hvparams or self.op.beparams or self.op.os_name or
12796 self.op.offline is not None or self.op.runtime_mem):
12797 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12799 if self.op.hvparams:
12800 _CheckGlobalHvParams(self.op.hvparams)
12802 self.op.disks = self._UpgradeDiskNicMods(
12803 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12804 self.op.nics = self._UpgradeDiskNicMods(
12805 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12807 # Check disk modifications
12808 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12809 self._VerifyDiskModification)
12811 if self.op.disks and self.op.disk_template is not None:
12812 raise errors.OpPrereqError("Disk template conversion and other disk"
12813 " changes not supported at the same time",
12814 errors.ECODE_INVAL)
12816 if (self.op.disk_template and
12817 self.op.disk_template in constants.DTS_INT_MIRROR and
12818 self.op.remote_node is None):
12819 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12820 " one requires specifying a secondary node",
12821 errors.ECODE_INVAL)
12823 # Check NIC modifications
12824 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12825 self._VerifyNicModification)
12827 def ExpandNames(self):
12828 self._ExpandAndLockInstance()
12829 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12830 # Can't even acquire node locks in shared mode as upcoming changes in
12831 # Ganeti 2.6 will start to modify the node object on disk conversion
12832 self.needed_locks[locking.LEVEL_NODE] = []
12833 self.needed_locks[locking.LEVEL_NODE_RES] = []
12834 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12835 # Look node group to look up the ipolicy
12836 self.share_locks[locking.LEVEL_NODEGROUP] = 1
12838 def DeclareLocks(self, level):
12839 if level == locking.LEVEL_NODEGROUP:
12840 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12841 # Acquire locks for the instance's nodegroups optimistically. Needs
12842 # to be verified in CheckPrereq
12843 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12844 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12845 elif level == locking.LEVEL_NODE:
12846 self._LockInstancesNodes()
12847 if self.op.disk_template and self.op.remote_node:
12848 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12849 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12850 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12852 self.needed_locks[locking.LEVEL_NODE_RES] = \
12853 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12855 def BuildHooksEnv(self):
12856 """Build hooks env.
12858 This runs on the master, primary and secondaries.
12862 if constants.BE_MINMEM in self.be_new:
12863 args["minmem"] = self.be_new[constants.BE_MINMEM]
12864 if constants.BE_MAXMEM in self.be_new:
12865 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12866 if constants.BE_VCPUS in self.be_new:
12867 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12868 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12869 # information at all.
12871 if self._new_nics is not None:
12874 for nic in self._new_nics:
12875 n = copy.deepcopy(nic)
12876 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12877 n.nicparams = nicparams
12878 nics.append(_NICToTuple(self, n))
12880 args["nics"] = nics
12882 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12883 if self.op.disk_template:
12884 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12885 if self.op.runtime_mem:
12886 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12890 def BuildHooksNodes(self):
12891 """Build hooks nodes.
12894 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12897 def _PrepareNicModification(self, params, private, old_ip, old_net,
12898 old_params, cluster, pnode):
12900 update_params_dict = dict([(key, params[key])
12901 for key in constants.NICS_PARAMETERS
12904 req_link = update_params_dict.get(constants.NIC_LINK, None)
12905 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12907 new_net = params.get(constants.INIC_NETWORK, old_net)
12908 if new_net is not None:
12909 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12910 if netparams is None:
12911 raise errors.OpPrereqError("No netparams found for the network"
12912 " %s, probably not connected" % new_net,
12913 errors.ECODE_INVAL)
12914 new_params = dict(netparams)
12916 new_params = _GetUpdatedParams(old_params, update_params_dict)
12918 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12920 new_filled_params = cluster.SimpleFillNIC(new_params)
12921 objects.NIC.CheckParameterSyntax(new_filled_params)
12923 new_mode = new_filled_params[constants.NIC_MODE]
12924 if new_mode == constants.NIC_MODE_BRIDGED:
12925 bridge = new_filled_params[constants.NIC_LINK]
12926 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12928 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12930 self.warn.append(msg)
12932 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12934 elif new_mode == constants.NIC_MODE_ROUTED:
12935 ip = params.get(constants.INIC_IP, old_ip)
12937 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12938 " on a routed NIC", errors.ECODE_INVAL)
12940 if constants.INIC_MAC in params:
12941 mac = params[constants.INIC_MAC]
12943 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12944 errors.ECODE_INVAL)
12945 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12946 # otherwise generate the MAC address
12947 params[constants.INIC_MAC] = \
12948 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12950 # or validate/reserve the current one
12952 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12953 except errors.ReservationError:
12954 raise errors.OpPrereqError("MAC address '%s' already in use"
12955 " in cluster" % mac,
12956 errors.ECODE_NOTUNIQUE)
12957 elif new_net != old_net:
12959 def get_net_prefix(net):
12961 uuid = self.cfg.LookupNetwork(net)
12963 nobj = self.cfg.GetNetwork(uuid)
12964 return nobj.mac_prefix
12967 new_prefix = get_net_prefix(new_net)
12968 old_prefix = get_net_prefix(old_net)
12969 if old_prefix != new_prefix:
12970 params[constants.INIC_MAC] = \
12971 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12973 #if there is a change in nic-network configuration
12974 new_ip = params.get(constants.INIC_IP, old_ip)
12975 if (new_ip, new_net) != (old_ip, old_net):
12978 if new_ip.lower() == constants.NIC_IP_POOL:
12980 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12981 except errors.ReservationError:
12982 raise errors.OpPrereqError("Unable to get a free IP"
12983 " from the address pool",
12984 errors.ECODE_STATE)
12985 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12986 params[constants.INIC_IP] = new_ip
12987 elif new_ip != old_ip or new_net != old_net:
12989 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12990 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12991 except errors.ReservationError:
12992 raise errors.OpPrereqError("IP %s not available in network %s" %
12994 errors.ECODE_NOTUNIQUE)
12995 elif new_ip.lower() == constants.NIC_IP_POOL:
12996 raise errors.OpPrereqError("ip=pool, but no network found",
12997 errors.ECODE_INVAL)
13000 if self.op.conflicts_check:
13001 _CheckForConflictingIp(self, new_ip, pnode)
13006 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13007 except errors.AddressPoolError:
13008 logging.warning("Release IP %s not contained in network %s",
13011 # there are no changes in (net, ip) tuple
13012 elif (old_net is not None and
13013 (req_link is not None or req_mode is not None)):
13014 raise errors.OpPrereqError("Not allowed to change link or mode of"
13015 " a NIC that is connected to a network",
13016 errors.ECODE_INVAL)
13018 private.params = new_params
13019 private.filled = new_filled_params
13021 def CheckPrereq(self):
13022 """Check prerequisites.
13024 This only checks the instance list against the existing names.
13027 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13028 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13030 cluster = self.cluster = self.cfg.GetClusterInfo()
13031 assert self.instance is not None, \
13032 "Cannot retrieve locked instance %s" % self.op.instance_name
13034 pnode = instance.primary_node
13035 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13036 nodelist = list(instance.all_nodes)
13037 pnode_info = self.cfg.GetNodeInfo(pnode)
13038 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13040 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13041 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13042 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13044 # dictionary with instance information after the modification
13047 # Prepare disk/NIC modifications
13048 self.diskmod = PrepareContainerMods(self.op.disks, None)
13049 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13052 if self.op.os_name and not self.op.force:
13053 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13054 self.op.force_variant)
13055 instance_os = self.op.os_name
13057 instance_os = instance.os
13059 assert not (self.op.disk_template and self.op.disks), \
13060 "Can't modify disk template and apply disk changes at the same time"
13062 if self.op.disk_template:
13063 if instance.disk_template == self.op.disk_template:
13064 raise errors.OpPrereqError("Instance already has disk template %s" %
13065 instance.disk_template, errors.ECODE_INVAL)
13067 if (instance.disk_template,
13068 self.op.disk_template) not in self._DISK_CONVERSIONS:
13069 raise errors.OpPrereqError("Unsupported disk template conversion from"
13070 " %s to %s" % (instance.disk_template,
13071 self.op.disk_template),
13072 errors.ECODE_INVAL)
13073 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13074 msg="cannot change disk template")
13075 if self.op.disk_template in constants.DTS_INT_MIRROR:
13076 if self.op.remote_node == pnode:
13077 raise errors.OpPrereqError("Given new secondary node %s is the same"
13078 " as the primary node of the instance" %
13079 self.op.remote_node, errors.ECODE_STATE)
13080 _CheckNodeOnline(self, self.op.remote_node)
13081 _CheckNodeNotDrained(self, self.op.remote_node)
13082 # FIXME: here we assume that the old instance type is DT_PLAIN
13083 assert instance.disk_template == constants.DT_PLAIN
13084 disks = [{constants.IDISK_SIZE: d.size,
13085 constants.IDISK_VG: d.logical_id[0]}
13086 for d in instance.disks]
13087 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13088 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13090 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13091 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13092 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13094 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13095 ignore=self.op.ignore_ipolicy)
13096 if pnode_info.group != snode_info.group:
13097 self.LogWarning("The primary and secondary nodes are in two"
13098 " different node groups; the disk parameters"
13099 " from the first disk's node group will be"
13102 # hvparams processing
13103 if self.op.hvparams:
13104 hv_type = instance.hypervisor
13105 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13106 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13107 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13110 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13111 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13112 self.hv_proposed = self.hv_new = hv_new # the new actual values
13113 self.hv_inst = i_hvdict # the new dict (without defaults)
13115 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13117 self.hv_new = self.hv_inst = {}
13119 # beparams processing
13120 if self.op.beparams:
13121 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13123 objects.UpgradeBeParams(i_bedict)
13124 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13125 be_new = cluster.SimpleFillBE(i_bedict)
13126 self.be_proposed = self.be_new = be_new # the new actual values
13127 self.be_inst = i_bedict # the new dict (without defaults)
13129 self.be_new = self.be_inst = {}
13130 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13131 be_old = cluster.FillBE(instance)
13133 # CPU param validation -- checking every time a parameter is
13134 # changed to cover all cases where either CPU mask or vcpus have
13136 if (constants.BE_VCPUS in self.be_proposed and
13137 constants.HV_CPU_MASK in self.hv_proposed):
13139 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13140 # Verify mask is consistent with number of vCPUs. Can skip this
13141 # test if only 1 entry in the CPU mask, which means same mask
13142 # is applied to all vCPUs.
13143 if (len(cpu_list) > 1 and
13144 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13145 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13147 (self.be_proposed[constants.BE_VCPUS],
13148 self.hv_proposed[constants.HV_CPU_MASK]),
13149 errors.ECODE_INVAL)
13151 # Only perform this test if a new CPU mask is given
13152 if constants.HV_CPU_MASK in self.hv_new:
13153 # Calculate the largest CPU number requested
13154 max_requested_cpu = max(map(max, cpu_list))
13155 # Check that all of the instance's nodes have enough physical CPUs to
13156 # satisfy the requested CPU mask
13157 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13158 max_requested_cpu + 1, instance.hypervisor)
13160 # osparams processing
13161 if self.op.osparams:
13162 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13163 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13164 self.os_inst = i_osdict # the new dict (without defaults)
13170 #TODO(dynmem): do the appropriate check involving MINMEM
13171 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13172 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13173 mem_check_list = [pnode]
13174 if be_new[constants.BE_AUTO_BALANCE]:
13175 # either we changed auto_balance to yes or it was from before
13176 mem_check_list.extend(instance.secondary_nodes)
13177 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13178 instance.hypervisor)
13179 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13180 [instance.hypervisor])
13181 pninfo = nodeinfo[pnode]
13182 msg = pninfo.fail_msg
13184 # Assume the primary node is unreachable and go ahead
13185 self.warn.append("Can't get info from primary node %s: %s" %
13188 (_, _, (pnhvinfo, )) = pninfo.payload
13189 if not isinstance(pnhvinfo.get("memory_free", None), int):
13190 self.warn.append("Node data from primary node %s doesn't contain"
13191 " free memory information" % pnode)
13192 elif instance_info.fail_msg:
13193 self.warn.append("Can't get instance runtime information: %s" %
13194 instance_info.fail_msg)
13196 if instance_info.payload:
13197 current_mem = int(instance_info.payload["memory"])
13199 # Assume instance not running
13200 # (there is a slight race condition here, but it's not very
13201 # probable, and we have no other way to check)
13202 # TODO: Describe race condition
13204 #TODO(dynmem): do the appropriate check involving MINMEM
13205 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13206 pnhvinfo["memory_free"])
13208 raise errors.OpPrereqError("This change will prevent the instance"
13209 " from starting, due to %d MB of memory"
13210 " missing on its primary node" %
13211 miss_mem, errors.ECODE_NORES)
13213 if be_new[constants.BE_AUTO_BALANCE]:
13214 for node, nres in nodeinfo.items():
13215 if node not in instance.secondary_nodes:
13217 nres.Raise("Can't get info from secondary node %s" % node,
13218 prereq=True, ecode=errors.ECODE_STATE)
13219 (_, _, (nhvinfo, )) = nres.payload
13220 if not isinstance(nhvinfo.get("memory_free", None), int):
13221 raise errors.OpPrereqError("Secondary node %s didn't return free"
13222 " memory information" % node,
13223 errors.ECODE_STATE)
13224 #TODO(dynmem): do the appropriate check involving MINMEM
13225 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13226 raise errors.OpPrereqError("This change will prevent the instance"
13227 " from failover to its secondary node"
13228 " %s, due to not enough memory" % node,
13229 errors.ECODE_STATE)
13231 if self.op.runtime_mem:
13232 remote_info = self.rpc.call_instance_info(instance.primary_node,
13234 instance.hypervisor)
13235 remote_info.Raise("Error checking node %s" % instance.primary_node)
13236 if not remote_info.payload: # not running already
13237 raise errors.OpPrereqError("Instance %s is not running" %
13238 instance.name, errors.ECODE_STATE)
13240 current_memory = remote_info.payload["memory"]
13241 if (not self.op.force and
13242 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13243 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13244 raise errors.OpPrereqError("Instance %s must have memory between %d"
13245 " and %d MB of memory unless --force is"
13248 self.be_proposed[constants.BE_MINMEM],
13249 self.be_proposed[constants.BE_MAXMEM]),
13250 errors.ECODE_INVAL)
13252 delta = self.op.runtime_mem - current_memory
13254 _CheckNodeFreeMemory(self, instance.primary_node,
13255 "ballooning memory for instance %s" %
13256 instance.name, delta, instance.hypervisor)
13258 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13259 raise errors.OpPrereqError("Disk operations not supported for"
13260 " diskless instances", errors.ECODE_INVAL)
13262 def _PrepareNicCreate(_, params, private):
13263 self._PrepareNicModification(params, private, None, None,
13264 {}, cluster, pnode)
13265 return (None, None)
13267 def _PrepareNicMod(_, nic, params, private):
13268 self._PrepareNicModification(params, private, nic.ip, nic.network,
13269 nic.nicparams, cluster, pnode)
13272 def _PrepareNicRemove(_, params, __):
13274 net = params.network
13275 if net is not None and ip is not None:
13276 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13278 # Verify NIC changes (operating on copy)
13279 nics = instance.nics[:]
13280 ApplyContainerMods("NIC", nics, None, self.nicmod,
13281 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13282 if len(nics) > constants.MAX_NICS:
13283 raise errors.OpPrereqError("Instance has too many network interfaces"
13284 " (%d), cannot add more" % constants.MAX_NICS,
13285 errors.ECODE_STATE)
13287 # Verify disk changes (operating on a copy)
13288 disks = instance.disks[:]
13289 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13290 if len(disks) > constants.MAX_DISKS:
13291 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13292 " more" % constants.MAX_DISKS,
13293 errors.ECODE_STATE)
13294 disk_sizes = [disk.size for disk in instance.disks]
13295 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13296 self.diskmod if op == constants.DDM_ADD)
13297 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13298 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13300 if self.op.offline is not None:
13301 if self.op.offline:
13302 msg = "can't change to offline"
13304 msg = "can't change to online"
13305 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13307 # Pre-compute NIC changes (necessary to use result in hooks)
13308 self._nic_chgdesc = []
13310 # Operate on copies as this is still in prereq
13311 nics = [nic.Copy() for nic in instance.nics]
13312 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13313 self._CreateNewNic, self._ApplyNicMods, None)
13314 self._new_nics = nics
13315 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13317 self._new_nics = None
13318 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13320 if not self.op.ignore_ipolicy:
13321 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13324 # Fill ispec with backend parameters
13325 ispec[constants.ISPEC_SPINDLE_USE] = \
13326 self.be_new.get(constants.BE_SPINDLE_USE, None)
13327 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13330 # Copy ispec to verify parameters with min/max values separately
13331 ispec_max = ispec.copy()
13332 ispec_max[constants.ISPEC_MEM_SIZE] = \
13333 self.be_new.get(constants.BE_MAXMEM, None)
13334 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13335 ispec_min = ispec.copy()
13336 ispec_min[constants.ISPEC_MEM_SIZE] = \
13337 self.be_new.get(constants.BE_MINMEM, None)
13338 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13340 if (res_max or res_min):
13341 # FIXME: Improve error message by including information about whether
13342 # the upper or lower limit of the parameter fails the ipolicy.
13343 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13344 (group_info, group_info.name,
13345 utils.CommaJoin(set(res_max + res_min))))
13346 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13348 def _ConvertPlainToDrbd(self, feedback_fn):
13349 """Converts an instance from plain to drbd.
13352 feedback_fn("Converting template to drbd")
13353 instance = self.instance
13354 pnode = instance.primary_node
13355 snode = self.op.remote_node
13357 assert instance.disk_template == constants.DT_PLAIN
13359 # create a fake disk info for _GenerateDiskTemplate
13360 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13361 constants.IDISK_VG: d.logical_id[0]}
13362 for d in instance.disks]
13363 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13364 instance.name, pnode, [snode],
13365 disk_info, None, None, 0, feedback_fn,
13367 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13369 info = _GetInstanceInfoText(instance)
13370 feedback_fn("Creating additional volumes...")
13371 # first, create the missing data and meta devices
13372 for disk in anno_disks:
13373 # unfortunately this is... not too nice
13374 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13376 for child in disk.children:
13377 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13378 # at this stage, all new LVs have been created, we can rename the
13380 feedback_fn("Renaming original volumes...")
13381 rename_list = [(o, n.children[0].logical_id)
13382 for (o, n) in zip(instance.disks, new_disks)]
13383 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13384 result.Raise("Failed to rename original LVs")
13386 feedback_fn("Initializing DRBD devices...")
13387 # all child devices are in place, we can now create the DRBD devices
13388 for disk in anno_disks:
13389 for node in [pnode, snode]:
13390 f_create = node == pnode
13391 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13393 # at this point, the instance has been modified
13394 instance.disk_template = constants.DT_DRBD8
13395 instance.disks = new_disks
13396 self.cfg.Update(instance, feedback_fn)
13398 # Release node locks while waiting for sync
13399 _ReleaseLocks(self, locking.LEVEL_NODE)
13401 # disks are created, waiting for sync
13402 disk_abort = not _WaitForSync(self, instance,
13403 oneshot=not self.op.wait_for_sync)
13405 raise errors.OpExecError("There are some degraded disks for"
13406 " this instance, please cleanup manually")
13408 # Node resource locks will be released by caller
13410 def _ConvertDrbdToPlain(self, feedback_fn):
13411 """Converts an instance from drbd to plain.
13414 instance = self.instance
13416 assert len(instance.secondary_nodes) == 1
13417 assert instance.disk_template == constants.DT_DRBD8
13419 pnode = instance.primary_node
13420 snode = instance.secondary_nodes[0]
13421 feedback_fn("Converting template to plain")
13423 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13424 new_disks = [d.children[0] for d in instance.disks]
13426 # copy over size and mode
13427 for parent, child in zip(old_disks, new_disks):
13428 child.size = parent.size
13429 child.mode = parent.mode
13431 # this is a DRBD disk, return its port to the pool
13432 # NOTE: this must be done right before the call to cfg.Update!
13433 for disk in old_disks:
13434 tcp_port = disk.logical_id[2]
13435 self.cfg.AddTcpUdpPort(tcp_port)
13437 # update instance structure
13438 instance.disks = new_disks
13439 instance.disk_template = constants.DT_PLAIN
13440 self.cfg.Update(instance, feedback_fn)
13442 # Release locks in case removing disks takes a while
13443 _ReleaseLocks(self, locking.LEVEL_NODE)
13445 feedback_fn("Removing volumes on the secondary node...")
13446 for disk in old_disks:
13447 self.cfg.SetDiskID(disk, snode)
13448 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13450 self.LogWarning("Could not remove block device %s on node %s,"
13451 " continuing anyway: %s", disk.iv_name, snode, msg)
13453 feedback_fn("Removing unneeded volumes on the primary node...")
13454 for idx, disk in enumerate(old_disks):
13455 meta = disk.children[1]
13456 self.cfg.SetDiskID(meta, pnode)
13457 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13459 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13460 " continuing anyway: %s", idx, pnode, msg)
13462 def _CreateNewDisk(self, idx, params, _):
13463 """Creates a new disk.
13466 instance = self.instance
13469 if instance.disk_template in constants.DTS_FILEBASED:
13470 (file_driver, file_path) = instance.disks[0].logical_id
13471 file_path = os.path.dirname(file_path)
13473 file_driver = file_path = None
13476 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13477 instance.primary_node, instance.secondary_nodes,
13478 [params], file_path, file_driver, idx,
13479 self.Log, self.diskparams)[0]
13481 info = _GetInstanceInfoText(instance)
13483 logging.info("Creating volume %s for instance %s",
13484 disk.iv_name, instance.name)
13485 # Note: this needs to be kept in sync with _CreateDisks
13487 for node in instance.all_nodes:
13488 f_create = (node == instance.primary_node)
13490 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13491 except errors.OpExecError, err:
13492 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13493 disk.iv_name, disk, node, err)
13496 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13500 def _ModifyDisk(idx, disk, params, _):
13501 """Modifies a disk.
13504 disk.mode = params[constants.IDISK_MODE]
13507 ("disk.mode/%d" % idx, disk.mode),
13510 def _RemoveDisk(self, idx, root, _):
13514 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13515 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13516 self.cfg.SetDiskID(disk, node)
13517 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13519 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13520 " continuing anyway", idx, node, msg)
13522 # if this is a DRBD disk, return its port to the pool
13523 if root.dev_type in constants.LDS_DRBD:
13524 self.cfg.AddTcpUdpPort(root.logical_id[2])
13527 def _CreateNewNic(idx, params, private):
13528 """Creates data structure for a new network interface.
13531 mac = params[constants.INIC_MAC]
13532 ip = params.get(constants.INIC_IP, None)
13533 net = params.get(constants.INIC_NETWORK, None)
13534 #TODO: not private.filled?? can a nic have no nicparams??
13535 nicparams = private.filled
13537 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13539 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13540 (mac, ip, private.filled[constants.NIC_MODE],
13541 private.filled[constants.NIC_LINK],
13546 def _ApplyNicMods(idx, nic, params, private):
13547 """Modifies a network interface.
13552 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13554 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13555 setattr(nic, key, params[key])
13558 nic.nicparams = private.filled
13560 for (key, val) in nic.nicparams.items():
13561 changes.append(("nic.%s/%d" % (key, idx), val))
13565 def Exec(self, feedback_fn):
13566 """Modifies an instance.
13568 All parameters take effect only at the next restart of the instance.
13571 # Process here the warnings from CheckPrereq, as we don't have a
13572 # feedback_fn there.
13573 # TODO: Replace with self.LogWarning
13574 for warn in self.warn:
13575 feedback_fn("WARNING: %s" % warn)
13577 assert ((self.op.disk_template is None) ^
13578 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13579 "Not owning any node resource locks"
13582 instance = self.instance
13585 if self.op.runtime_mem:
13586 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13588 self.op.runtime_mem)
13589 rpcres.Raise("Cannot modify instance runtime memory")
13590 result.append(("runtime_memory", self.op.runtime_mem))
13592 # Apply disk changes
13593 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13594 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13595 _UpdateIvNames(0, instance.disks)
13597 if self.op.disk_template:
13599 check_nodes = set(instance.all_nodes)
13600 if self.op.remote_node:
13601 check_nodes.add(self.op.remote_node)
13602 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13603 owned = self.owned_locks(level)
13604 assert not (check_nodes - owned), \
13605 ("Not owning the correct locks, owning %r, expected at least %r" %
13606 (owned, check_nodes))
13608 r_shut = _ShutdownInstanceDisks(self, instance)
13610 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13611 " proceed with disk template conversion")
13612 mode = (instance.disk_template, self.op.disk_template)
13614 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13616 self.cfg.ReleaseDRBDMinors(instance.name)
13618 result.append(("disk_template", self.op.disk_template))
13620 assert instance.disk_template == self.op.disk_template, \
13621 ("Expected disk template '%s', found '%s'" %
13622 (self.op.disk_template, instance.disk_template))
13624 # Release node and resource locks if there are any (they might already have
13625 # been released during disk conversion)
13626 _ReleaseLocks(self, locking.LEVEL_NODE)
13627 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13629 # Apply NIC changes
13630 if self._new_nics is not None:
13631 instance.nics = self._new_nics
13632 result.extend(self._nic_chgdesc)
13635 if self.op.hvparams:
13636 instance.hvparams = self.hv_inst
13637 for key, val in self.op.hvparams.iteritems():
13638 result.append(("hv/%s" % key, val))
13641 if self.op.beparams:
13642 instance.beparams = self.be_inst
13643 for key, val in self.op.beparams.iteritems():
13644 result.append(("be/%s" % key, val))
13647 if self.op.os_name:
13648 instance.os = self.op.os_name
13651 if self.op.osparams:
13652 instance.osparams = self.os_inst
13653 for key, val in self.op.osparams.iteritems():
13654 result.append(("os/%s" % key, val))
13656 if self.op.offline is None:
13659 elif self.op.offline:
13660 # Mark instance as offline
13661 self.cfg.MarkInstanceOffline(instance.name)
13662 result.append(("admin_state", constants.ADMINST_OFFLINE))
13664 # Mark instance as online, but stopped
13665 self.cfg.MarkInstanceDown(instance.name)
13666 result.append(("admin_state", constants.ADMINST_DOWN))
13668 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13670 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13671 self.owned_locks(locking.LEVEL_NODE)), \
13672 "All node locks should have been released by now"
13676 _DISK_CONVERSIONS = {
13677 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13678 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13682 class LUInstanceChangeGroup(LogicalUnit):
13683 HPATH = "instance-change-group"
13684 HTYPE = constants.HTYPE_INSTANCE
13687 def ExpandNames(self):
13688 self.share_locks = _ShareAll()
13689 self.needed_locks = {
13690 locking.LEVEL_NODEGROUP: [],
13691 locking.LEVEL_NODE: [],
13694 self._ExpandAndLockInstance()
13696 if self.op.target_groups:
13697 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13698 self.op.target_groups)
13700 self.req_target_uuids = None
13702 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13704 def DeclareLocks(self, level):
13705 if level == locking.LEVEL_NODEGROUP:
13706 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13708 if self.req_target_uuids:
13709 lock_groups = set(self.req_target_uuids)
13711 # Lock all groups used by instance optimistically; this requires going
13712 # via the node before it's locked, requiring verification later on
13713 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13714 lock_groups.update(instance_groups)
13716 # No target groups, need to lock all of them
13717 lock_groups = locking.ALL_SET
13719 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13721 elif level == locking.LEVEL_NODE:
13722 if self.req_target_uuids:
13723 # Lock all nodes used by instances
13724 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13725 self._LockInstancesNodes()
13727 # Lock all nodes in all potential target groups
13728 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13729 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13730 member_nodes = [node_name
13731 for group in lock_groups
13732 for node_name in self.cfg.GetNodeGroup(group).members]
13733 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13735 # Lock all nodes as all groups are potential targets
13736 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13738 def CheckPrereq(self):
13739 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13740 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13741 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13743 assert (self.req_target_uuids is None or
13744 owned_groups.issuperset(self.req_target_uuids))
13745 assert owned_instances == set([self.op.instance_name])
13747 # Get instance information
13748 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13750 # Check if node groups for locked instance are still correct
13751 assert owned_nodes.issuperset(self.instance.all_nodes), \
13752 ("Instance %s's nodes changed while we kept the lock" %
13753 self.op.instance_name)
13755 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13758 if self.req_target_uuids:
13759 # User requested specific target groups
13760 self.target_uuids = frozenset(self.req_target_uuids)
13762 # All groups except those used by the instance are potential targets
13763 self.target_uuids = owned_groups - inst_groups
13765 conflicting_groups = self.target_uuids & inst_groups
13766 if conflicting_groups:
13767 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13768 " used by the instance '%s'" %
13769 (utils.CommaJoin(conflicting_groups),
13770 self.op.instance_name),
13771 errors.ECODE_INVAL)
13773 if not self.target_uuids:
13774 raise errors.OpPrereqError("There are no possible target groups",
13775 errors.ECODE_INVAL)
13777 def BuildHooksEnv(self):
13778 """Build hooks env.
13781 assert self.target_uuids
13784 "TARGET_GROUPS": " ".join(self.target_uuids),
13787 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13791 def BuildHooksNodes(self):
13792 """Build hooks nodes.
13795 mn = self.cfg.GetMasterNode()
13796 return ([mn], [mn])
13798 def Exec(self, feedback_fn):
13799 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13801 assert instances == [self.op.instance_name], "Instance not locked"
13803 req = iallocator.IAReqGroupChange(instances=instances,
13804 target_groups=list(self.target_uuids))
13805 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13807 ial.Run(self.op.iallocator)
13809 if not ial.success:
13810 raise errors.OpPrereqError("Can't compute solution for changing group of"
13811 " instance '%s' using iallocator '%s': %s" %
13812 (self.op.instance_name, self.op.iallocator,
13813 ial.info), errors.ECODE_NORES)
13815 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13817 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13818 " instance '%s'", len(jobs), self.op.instance_name)
13820 return ResultWithJobs(jobs)
13823 class LUBackupQuery(NoHooksLU):
13824 """Query the exports list
13829 def CheckArguments(self):
13830 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13831 ["node", "export"], self.op.use_locking)
13833 def ExpandNames(self):
13834 self.expq.ExpandNames(self)
13836 def DeclareLocks(self, level):
13837 self.expq.DeclareLocks(self, level)
13839 def Exec(self, feedback_fn):
13842 for (node, expname) in self.expq.OldStyleQuery(self):
13843 if expname is None:
13844 result[node] = False
13846 result.setdefault(node, []).append(expname)
13851 class _ExportQuery(_QueryBase):
13852 FIELDS = query.EXPORT_FIELDS
13854 #: The node name is not a unique key for this query
13855 SORT_FIELD = "node"
13857 def ExpandNames(self, lu):
13858 lu.needed_locks = {}
13860 # The following variables interact with _QueryBase._GetNames
13862 self.wanted = _GetWantedNodes(lu, self.names)
13864 self.wanted = locking.ALL_SET
13866 self.do_locking = self.use_locking
13868 if self.do_locking:
13869 lu.share_locks = _ShareAll()
13870 lu.needed_locks = {
13871 locking.LEVEL_NODE: self.wanted,
13874 def DeclareLocks(self, lu, level):
13877 def _GetQueryData(self, lu):
13878 """Computes the list of nodes and their attributes.
13881 # Locking is not used
13883 assert not (compat.any(lu.glm.is_owned(level)
13884 for level in locking.LEVELS
13885 if level != locking.LEVEL_CLUSTER) or
13886 self.do_locking or self.use_locking)
13888 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13892 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13894 result.append((node, None))
13896 result.extend((node, expname) for expname in nres.payload)
13901 class LUBackupPrepare(NoHooksLU):
13902 """Prepares an instance for an export and returns useful information.
13907 def ExpandNames(self):
13908 self._ExpandAndLockInstance()
13910 def CheckPrereq(self):
13911 """Check prerequisites.
13914 instance_name = self.op.instance_name
13916 self.instance = self.cfg.GetInstanceInfo(instance_name)
13917 assert self.instance is not None, \
13918 "Cannot retrieve locked instance %s" % self.op.instance_name
13919 _CheckNodeOnline(self, self.instance.primary_node)
13921 self._cds = _GetClusterDomainSecret()
13923 def Exec(self, feedback_fn):
13924 """Prepares an instance for an export.
13927 instance = self.instance
13929 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13930 salt = utils.GenerateSecret(8)
13932 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13933 result = self.rpc.call_x509_cert_create(instance.primary_node,
13934 constants.RIE_CERT_VALIDITY)
13935 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13937 (name, cert_pem) = result.payload
13939 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13943 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13944 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13946 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13952 class LUBackupExport(LogicalUnit):
13953 """Export an instance to an image in the cluster.
13956 HPATH = "instance-export"
13957 HTYPE = constants.HTYPE_INSTANCE
13960 def CheckArguments(self):
13961 """Check the arguments.
13964 self.x509_key_name = self.op.x509_key_name
13965 self.dest_x509_ca_pem = self.op.destination_x509_ca
13967 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13968 if not self.x509_key_name:
13969 raise errors.OpPrereqError("Missing X509 key name for encryption",
13970 errors.ECODE_INVAL)
13972 if not self.dest_x509_ca_pem:
13973 raise errors.OpPrereqError("Missing destination X509 CA",
13974 errors.ECODE_INVAL)
13976 def ExpandNames(self):
13977 self._ExpandAndLockInstance()
13979 # Lock all nodes for local exports
13980 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13981 # FIXME: lock only instance primary and destination node
13983 # Sad but true, for now we have do lock all nodes, as we don't know where
13984 # the previous export might be, and in this LU we search for it and
13985 # remove it from its current node. In the future we could fix this by:
13986 # - making a tasklet to search (share-lock all), then create the
13987 # new one, then one to remove, after
13988 # - removing the removal operation altogether
13989 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13991 def DeclareLocks(self, level):
13992 """Last minute lock declaration."""
13993 # All nodes are locked anyway, so nothing to do here.
13995 def BuildHooksEnv(self):
13996 """Build hooks env.
13998 This will run on the master, primary node and target node.
14002 "EXPORT_MODE": self.op.mode,
14003 "EXPORT_NODE": self.op.target_node,
14004 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14005 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14006 # TODO: Generic function for boolean env variables
14007 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14010 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14014 def BuildHooksNodes(self):
14015 """Build hooks nodes.
14018 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14020 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14021 nl.append(self.op.target_node)
14025 def CheckPrereq(self):
14026 """Check prerequisites.
14028 This checks that the instance and node names are valid.
14031 instance_name = self.op.instance_name
14033 self.instance = self.cfg.GetInstanceInfo(instance_name)
14034 assert self.instance is not None, \
14035 "Cannot retrieve locked instance %s" % self.op.instance_name
14036 _CheckNodeOnline(self, self.instance.primary_node)
14038 if (self.op.remove_instance and
14039 self.instance.admin_state == constants.ADMINST_UP and
14040 not self.op.shutdown):
14041 raise errors.OpPrereqError("Can not remove instance without shutting it"
14042 " down before", errors.ECODE_STATE)
14044 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14045 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14046 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14047 assert self.dst_node is not None
14049 _CheckNodeOnline(self, self.dst_node.name)
14050 _CheckNodeNotDrained(self, self.dst_node.name)
14053 self.dest_disk_info = None
14054 self.dest_x509_ca = None
14056 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14057 self.dst_node = None
14059 if len(self.op.target_node) != len(self.instance.disks):
14060 raise errors.OpPrereqError(("Received destination information for %s"
14061 " disks, but instance %s has %s disks") %
14062 (len(self.op.target_node), instance_name,
14063 len(self.instance.disks)),
14064 errors.ECODE_INVAL)
14066 cds = _GetClusterDomainSecret()
14068 # Check X509 key name
14070 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14071 except (TypeError, ValueError), err:
14072 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14073 errors.ECODE_INVAL)
14075 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14076 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14077 errors.ECODE_INVAL)
14079 # Load and verify CA
14081 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14082 except OpenSSL.crypto.Error, err:
14083 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14084 (err, ), errors.ECODE_INVAL)
14086 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14087 if errcode is not None:
14088 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14089 (msg, ), errors.ECODE_INVAL)
14091 self.dest_x509_ca = cert
14093 # Verify target information
14095 for idx, disk_data in enumerate(self.op.target_node):
14097 (host, port, magic) = \
14098 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14099 except errors.GenericError, err:
14100 raise errors.OpPrereqError("Target info for disk %s: %s" %
14101 (idx, err), errors.ECODE_INVAL)
14103 disk_info.append((host, port, magic))
14105 assert len(disk_info) == len(self.op.target_node)
14106 self.dest_disk_info = disk_info
14109 raise errors.ProgrammerError("Unhandled export mode %r" %
14112 # instance disk type verification
14113 # TODO: Implement export support for file-based disks
14114 for disk in self.instance.disks:
14115 if disk.dev_type == constants.LD_FILE:
14116 raise errors.OpPrereqError("Export not supported for instances with"
14117 " file-based disks", errors.ECODE_INVAL)
14119 def _CleanupExports(self, feedback_fn):
14120 """Removes exports of current instance from all other nodes.
14122 If an instance in a cluster with nodes A..D was exported to node C, its
14123 exports will be removed from the nodes A, B and D.
14126 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14128 nodelist = self.cfg.GetNodeList()
14129 nodelist.remove(self.dst_node.name)
14131 # on one-node clusters nodelist will be empty after the removal
14132 # if we proceed the backup would be removed because OpBackupQuery
14133 # substitutes an empty list with the full cluster node list.
14134 iname = self.instance.name
14136 feedback_fn("Removing old exports for instance %s" % iname)
14137 exportlist = self.rpc.call_export_list(nodelist)
14138 for node in exportlist:
14139 if exportlist[node].fail_msg:
14141 if iname in exportlist[node].payload:
14142 msg = self.rpc.call_export_remove(node, iname).fail_msg
14144 self.LogWarning("Could not remove older export for instance %s"
14145 " on node %s: %s", iname, node, msg)
14147 def Exec(self, feedback_fn):
14148 """Export an instance to an image in the cluster.
14151 assert self.op.mode in constants.EXPORT_MODES
14153 instance = self.instance
14154 src_node = instance.primary_node
14156 if self.op.shutdown:
14157 # shutdown the instance, but not the disks
14158 feedback_fn("Shutting down instance %s" % instance.name)
14159 result = self.rpc.call_instance_shutdown(src_node, instance,
14160 self.op.shutdown_timeout)
14161 # TODO: Maybe ignore failures if ignore_remove_failures is set
14162 result.Raise("Could not shutdown instance %s on"
14163 " node %s" % (instance.name, src_node))
14165 # set the disks ID correctly since call_instance_start needs the
14166 # correct drbd minor to create the symlinks
14167 for disk in instance.disks:
14168 self.cfg.SetDiskID(disk, src_node)
14170 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14173 # Activate the instance disks if we'exporting a stopped instance
14174 feedback_fn("Activating disks for %s" % instance.name)
14175 _StartInstanceDisks(self, instance, None)
14178 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14181 helper.CreateSnapshots()
14183 if (self.op.shutdown and
14184 instance.admin_state == constants.ADMINST_UP and
14185 not self.op.remove_instance):
14186 assert not activate_disks
14187 feedback_fn("Starting instance %s" % instance.name)
14188 result = self.rpc.call_instance_start(src_node,
14189 (instance, None, None), False)
14190 msg = result.fail_msg
14192 feedback_fn("Failed to start instance: %s" % msg)
14193 _ShutdownInstanceDisks(self, instance)
14194 raise errors.OpExecError("Could not start instance: %s" % msg)
14196 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14197 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14198 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14199 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14200 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14202 (key_name, _, _) = self.x509_key_name
14205 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14208 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14209 key_name, dest_ca_pem,
14214 # Check for backwards compatibility
14215 assert len(dresults) == len(instance.disks)
14216 assert compat.all(isinstance(i, bool) for i in dresults), \
14217 "Not all results are boolean: %r" % dresults
14221 feedback_fn("Deactivating disks for %s" % instance.name)
14222 _ShutdownInstanceDisks(self, instance)
14224 if not (compat.all(dresults) and fin_resu):
14227 failures.append("export finalization")
14228 if not compat.all(dresults):
14229 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14231 failures.append("disk export: disk(s) %s" % fdsk)
14233 raise errors.OpExecError("Export failed, errors in %s" %
14234 utils.CommaJoin(failures))
14236 # At this point, the export was successful, we can cleanup/finish
14238 # Remove instance if requested
14239 if self.op.remove_instance:
14240 feedback_fn("Removing instance %s" % instance.name)
14241 _RemoveInstance(self, feedback_fn, instance,
14242 self.op.ignore_remove_failures)
14244 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14245 self._CleanupExports(feedback_fn)
14247 return fin_resu, dresults
14250 class LUBackupRemove(NoHooksLU):
14251 """Remove exports related to the named instance.
14256 def ExpandNames(self):
14257 self.needed_locks = {}
14258 # We need all nodes to be locked in order for RemoveExport to work, but we
14259 # don't need to lock the instance itself, as nothing will happen to it (and
14260 # we can remove exports also for a removed instance)
14261 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14263 def Exec(self, feedback_fn):
14264 """Remove any export.
14267 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14268 # If the instance was not found we'll try with the name that was passed in.
14269 # This will only work if it was an FQDN, though.
14271 if not instance_name:
14273 instance_name = self.op.instance_name
14275 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14276 exportlist = self.rpc.call_export_list(locked_nodes)
14278 for node in exportlist:
14279 msg = exportlist[node].fail_msg
14281 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14283 if instance_name in exportlist[node].payload:
14285 result = self.rpc.call_export_remove(node, instance_name)
14286 msg = result.fail_msg
14288 logging.error("Could not remove export for instance %s"
14289 " on node %s: %s", instance_name, node, msg)
14291 if fqdn_warn and not found:
14292 feedback_fn("Export not found. If trying to remove an export belonging"
14293 " to a deleted instance please use its Fully Qualified"
14297 class LUGroupAdd(LogicalUnit):
14298 """Logical unit for creating node groups.
14301 HPATH = "group-add"
14302 HTYPE = constants.HTYPE_GROUP
14305 def ExpandNames(self):
14306 # We need the new group's UUID here so that we can create and acquire the
14307 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14308 # that it should not check whether the UUID exists in the configuration.
14309 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14310 self.needed_locks = {}
14311 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14313 def CheckPrereq(self):
14314 """Check prerequisites.
14316 This checks that the given group name is not an existing node group
14321 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14322 except errors.OpPrereqError:
14325 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14326 " node group (UUID: %s)" %
14327 (self.op.group_name, existing_uuid),
14328 errors.ECODE_EXISTS)
14330 if self.op.ndparams:
14331 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14333 if self.op.hv_state:
14334 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14336 self.new_hv_state = None
14338 if self.op.disk_state:
14339 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14341 self.new_disk_state = None
14343 if self.op.diskparams:
14344 for templ in constants.DISK_TEMPLATES:
14345 if templ in self.op.diskparams:
14346 utils.ForceDictType(self.op.diskparams[templ],
14347 constants.DISK_DT_TYPES)
14348 self.new_diskparams = self.op.diskparams
14350 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14351 except errors.OpPrereqError, err:
14352 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14353 errors.ECODE_INVAL)
14355 self.new_diskparams = {}
14357 if self.op.ipolicy:
14358 cluster = self.cfg.GetClusterInfo()
14359 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14361 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14362 except errors.ConfigurationError, err:
14363 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14364 errors.ECODE_INVAL)
14366 def BuildHooksEnv(self):
14367 """Build hooks env.
14371 "GROUP_NAME": self.op.group_name,
14374 def BuildHooksNodes(self):
14375 """Build hooks nodes.
14378 mn = self.cfg.GetMasterNode()
14379 return ([mn], [mn])
14381 def Exec(self, feedback_fn):
14382 """Add the node group to the cluster.
14385 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14386 uuid=self.group_uuid,
14387 alloc_policy=self.op.alloc_policy,
14388 ndparams=self.op.ndparams,
14389 diskparams=self.new_diskparams,
14390 ipolicy=self.op.ipolicy,
14391 hv_state_static=self.new_hv_state,
14392 disk_state_static=self.new_disk_state)
14394 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14395 del self.remove_locks[locking.LEVEL_NODEGROUP]
14398 class LUGroupAssignNodes(NoHooksLU):
14399 """Logical unit for assigning nodes to groups.
14404 def ExpandNames(self):
14405 # These raise errors.OpPrereqError on their own:
14406 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14407 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14409 # We want to lock all the affected nodes and groups. We have readily
14410 # available the list of nodes, and the *destination* group. To gather the
14411 # list of "source" groups, we need to fetch node information later on.
14412 self.needed_locks = {
14413 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14414 locking.LEVEL_NODE: self.op.nodes,
14417 def DeclareLocks(self, level):
14418 if level == locking.LEVEL_NODEGROUP:
14419 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14421 # Try to get all affected nodes' groups without having the group or node
14422 # lock yet. Needs verification later in the code flow.
14423 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14425 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14427 def CheckPrereq(self):
14428 """Check prerequisites.
14431 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14432 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14433 frozenset(self.op.nodes))
14435 expected_locks = (set([self.group_uuid]) |
14436 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14437 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14438 if actual_locks != expected_locks:
14439 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14440 " current groups are '%s', used to be '%s'" %
14441 (utils.CommaJoin(expected_locks),
14442 utils.CommaJoin(actual_locks)))
14444 self.node_data = self.cfg.GetAllNodesInfo()
14445 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14446 instance_data = self.cfg.GetAllInstancesInfo()
14448 if self.group is None:
14449 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14450 (self.op.group_name, self.group_uuid))
14452 (new_splits, previous_splits) = \
14453 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14454 for node in self.op.nodes],
14455 self.node_data, instance_data)
14458 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14460 if not self.op.force:
14461 raise errors.OpExecError("The following instances get split by this"
14462 " change and --force was not given: %s" %
14465 self.LogWarning("This operation will split the following instances: %s",
14468 if previous_splits:
14469 self.LogWarning("In addition, these already-split instances continue"
14470 " to be split across groups: %s",
14471 utils.CommaJoin(utils.NiceSort(previous_splits)))
14473 def Exec(self, feedback_fn):
14474 """Assign nodes to a new group.
14477 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14479 self.cfg.AssignGroupNodes(mods)
14482 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14483 """Check for split instances after a node assignment.
14485 This method considers a series of node assignments as an atomic operation,
14486 and returns information about split instances after applying the set of
14489 In particular, it returns information about newly split instances, and
14490 instances that were already split, and remain so after the change.
14492 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14495 @type changes: list of (node_name, new_group_uuid) pairs.
14496 @param changes: list of node assignments to consider.
14497 @param node_data: a dict with data for all nodes
14498 @param instance_data: a dict with all instances to consider
14499 @rtype: a two-tuple
14500 @return: a list of instances that were previously okay and result split as a
14501 consequence of this change, and a list of instances that were previously
14502 split and this change does not fix.
14505 changed_nodes = dict((node, group) for node, group in changes
14506 if node_data[node].group != group)
14508 all_split_instances = set()
14509 previously_split_instances = set()
14511 def InstanceNodes(instance):
14512 return [instance.primary_node] + list(instance.secondary_nodes)
14514 for inst in instance_data.values():
14515 if inst.disk_template not in constants.DTS_INT_MIRROR:
14518 instance_nodes = InstanceNodes(inst)
14520 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14521 previously_split_instances.add(inst.name)
14523 if len(set(changed_nodes.get(node, node_data[node].group)
14524 for node in instance_nodes)) > 1:
14525 all_split_instances.add(inst.name)
14527 return (list(all_split_instances - previously_split_instances),
14528 list(previously_split_instances & all_split_instances))
14531 class _GroupQuery(_QueryBase):
14532 FIELDS = query.GROUP_FIELDS
14534 def ExpandNames(self, lu):
14535 lu.needed_locks = {}
14537 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14538 self._cluster = lu.cfg.GetClusterInfo()
14539 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14542 self.wanted = [name_to_uuid[name]
14543 for name in utils.NiceSort(name_to_uuid.keys())]
14545 # Accept names to be either names or UUIDs.
14548 all_uuid = frozenset(self._all_groups.keys())
14550 for name in self.names:
14551 if name in all_uuid:
14552 self.wanted.append(name)
14553 elif name in name_to_uuid:
14554 self.wanted.append(name_to_uuid[name])
14556 missing.append(name)
14559 raise errors.OpPrereqError("Some groups do not exist: %s" %
14560 utils.CommaJoin(missing),
14561 errors.ECODE_NOENT)
14563 def DeclareLocks(self, lu, level):
14566 def _GetQueryData(self, lu):
14567 """Computes the list of node groups and their attributes.
14570 do_nodes = query.GQ_NODE in self.requested_data
14571 do_instances = query.GQ_INST in self.requested_data
14573 group_to_nodes = None
14574 group_to_instances = None
14576 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14577 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14578 # latter GetAllInstancesInfo() is not enough, for we have to go through
14579 # instance->node. Hence, we will need to process nodes even if we only need
14580 # instance information.
14581 if do_nodes or do_instances:
14582 all_nodes = lu.cfg.GetAllNodesInfo()
14583 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14586 for node in all_nodes.values():
14587 if node.group in group_to_nodes:
14588 group_to_nodes[node.group].append(node.name)
14589 node_to_group[node.name] = node.group
14592 all_instances = lu.cfg.GetAllInstancesInfo()
14593 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14595 for instance in all_instances.values():
14596 node = instance.primary_node
14597 if node in node_to_group:
14598 group_to_instances[node_to_group[node]].append(instance.name)
14601 # Do not pass on node information if it was not requested.
14602 group_to_nodes = None
14604 return query.GroupQueryData(self._cluster,
14605 [self._all_groups[uuid]
14606 for uuid in self.wanted],
14607 group_to_nodes, group_to_instances,
14608 query.GQ_DISKPARAMS in self.requested_data)
14611 class LUGroupQuery(NoHooksLU):
14612 """Logical unit for querying node groups.
14617 def CheckArguments(self):
14618 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14619 self.op.output_fields, False)
14621 def ExpandNames(self):
14622 self.gq.ExpandNames(self)
14624 def DeclareLocks(self, level):
14625 self.gq.DeclareLocks(self, level)
14627 def Exec(self, feedback_fn):
14628 return self.gq.OldStyleQuery(self)
14631 class LUGroupSetParams(LogicalUnit):
14632 """Modifies the parameters of a node group.
14635 HPATH = "group-modify"
14636 HTYPE = constants.HTYPE_GROUP
14639 def CheckArguments(self):
14642 self.op.diskparams,
14643 self.op.alloc_policy,
14645 self.op.disk_state,
14649 if all_changes.count(None) == len(all_changes):
14650 raise errors.OpPrereqError("Please pass at least one modification",
14651 errors.ECODE_INVAL)
14653 def ExpandNames(self):
14654 # This raises errors.OpPrereqError on its own:
14655 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14657 self.needed_locks = {
14658 locking.LEVEL_INSTANCE: [],
14659 locking.LEVEL_NODEGROUP: [self.group_uuid],
14662 self.share_locks[locking.LEVEL_INSTANCE] = 1
14664 def DeclareLocks(self, level):
14665 if level == locking.LEVEL_INSTANCE:
14666 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14668 # Lock instances optimistically, needs verification once group lock has
14670 self.needed_locks[locking.LEVEL_INSTANCE] = \
14671 self.cfg.GetNodeGroupInstances(self.group_uuid)
14674 def _UpdateAndVerifyDiskParams(old, new):
14675 """Updates and verifies disk parameters.
14678 new_params = _GetUpdatedParams(old, new)
14679 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14682 def CheckPrereq(self):
14683 """Check prerequisites.
14686 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14688 # Check if locked instances are still correct
14689 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14691 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14692 cluster = self.cfg.GetClusterInfo()
14694 if self.group is None:
14695 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14696 (self.op.group_name, self.group_uuid))
14698 if self.op.ndparams:
14699 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14700 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14701 self.new_ndparams = new_ndparams
14703 if self.op.diskparams:
14704 diskparams = self.group.diskparams
14705 uavdp = self._UpdateAndVerifyDiskParams
14706 # For each disktemplate subdict update and verify the values
14707 new_diskparams = dict((dt,
14708 uavdp(diskparams.get(dt, {}),
14709 self.op.diskparams[dt]))
14710 for dt in constants.DISK_TEMPLATES
14711 if dt in self.op.diskparams)
14712 # As we've all subdicts of diskparams ready, lets merge the actual
14713 # dict with all updated subdicts
14714 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14716 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14717 except errors.OpPrereqError, err:
14718 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14719 errors.ECODE_INVAL)
14721 if self.op.hv_state:
14722 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14723 self.group.hv_state_static)
14725 if self.op.disk_state:
14726 self.new_disk_state = \
14727 _MergeAndVerifyDiskState(self.op.disk_state,
14728 self.group.disk_state_static)
14730 if self.op.ipolicy:
14731 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14735 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14736 inst_filter = lambda inst: inst.name in owned_instances
14737 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14738 gmi = ganeti.masterd.instance
14740 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14742 new_ipolicy, instances)
14745 self.LogWarning("After the ipolicy change the following instances"
14746 " violate them: %s",
14747 utils.CommaJoin(violations))
14749 def BuildHooksEnv(self):
14750 """Build hooks env.
14754 "GROUP_NAME": self.op.group_name,
14755 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14758 def BuildHooksNodes(self):
14759 """Build hooks nodes.
14762 mn = self.cfg.GetMasterNode()
14763 return ([mn], [mn])
14765 def Exec(self, feedback_fn):
14766 """Modifies the node group.
14771 if self.op.ndparams:
14772 self.group.ndparams = self.new_ndparams
14773 result.append(("ndparams", str(self.group.ndparams)))
14775 if self.op.diskparams:
14776 self.group.diskparams = self.new_diskparams
14777 result.append(("diskparams", str(self.group.diskparams)))
14779 if self.op.alloc_policy:
14780 self.group.alloc_policy = self.op.alloc_policy
14782 if self.op.hv_state:
14783 self.group.hv_state_static = self.new_hv_state
14785 if self.op.disk_state:
14786 self.group.disk_state_static = self.new_disk_state
14788 if self.op.ipolicy:
14789 self.group.ipolicy = self.new_ipolicy
14791 self.cfg.Update(self.group, feedback_fn)
14795 class LUGroupRemove(LogicalUnit):
14796 HPATH = "group-remove"
14797 HTYPE = constants.HTYPE_GROUP
14800 def ExpandNames(self):
14801 # This will raises errors.OpPrereqError on its own:
14802 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14803 self.needed_locks = {
14804 locking.LEVEL_NODEGROUP: [self.group_uuid],
14807 def CheckPrereq(self):
14808 """Check prerequisites.
14810 This checks that the given group name exists as a node group, that is
14811 empty (i.e., contains no nodes), and that is not the last group of the
14815 # Verify that the group is empty.
14816 group_nodes = [node.name
14817 for node in self.cfg.GetAllNodesInfo().values()
14818 if node.group == self.group_uuid]
14821 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14823 (self.op.group_name,
14824 utils.CommaJoin(utils.NiceSort(group_nodes))),
14825 errors.ECODE_STATE)
14827 # Verify the cluster would not be left group-less.
14828 if len(self.cfg.GetNodeGroupList()) == 1:
14829 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14830 " removed" % self.op.group_name,
14831 errors.ECODE_STATE)
14833 def BuildHooksEnv(self):
14834 """Build hooks env.
14838 "GROUP_NAME": self.op.group_name,
14841 def BuildHooksNodes(self):
14842 """Build hooks nodes.
14845 mn = self.cfg.GetMasterNode()
14846 return ([mn], [mn])
14848 def Exec(self, feedback_fn):
14849 """Remove the node group.
14853 self.cfg.RemoveNodeGroup(self.group_uuid)
14854 except errors.ConfigurationError:
14855 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14856 (self.op.group_name, self.group_uuid))
14858 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14861 class LUGroupRename(LogicalUnit):
14862 HPATH = "group-rename"
14863 HTYPE = constants.HTYPE_GROUP
14866 def ExpandNames(self):
14867 # This raises errors.OpPrereqError on its own:
14868 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14870 self.needed_locks = {
14871 locking.LEVEL_NODEGROUP: [self.group_uuid],
14874 def CheckPrereq(self):
14875 """Check prerequisites.
14877 Ensures requested new name is not yet used.
14881 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14882 except errors.OpPrereqError:
14885 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14886 " node group (UUID: %s)" %
14887 (self.op.new_name, new_name_uuid),
14888 errors.ECODE_EXISTS)
14890 def BuildHooksEnv(self):
14891 """Build hooks env.
14895 "OLD_NAME": self.op.group_name,
14896 "NEW_NAME": self.op.new_name,
14899 def BuildHooksNodes(self):
14900 """Build hooks nodes.
14903 mn = self.cfg.GetMasterNode()
14905 all_nodes = self.cfg.GetAllNodesInfo()
14906 all_nodes.pop(mn, None)
14909 run_nodes.extend(node.name for node in all_nodes.values()
14910 if node.group == self.group_uuid)
14912 return (run_nodes, run_nodes)
14914 def Exec(self, feedback_fn):
14915 """Rename the node group.
14918 group = self.cfg.GetNodeGroup(self.group_uuid)
14921 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14922 (self.op.group_name, self.group_uuid))
14924 group.name = self.op.new_name
14925 self.cfg.Update(group, feedback_fn)
14927 return self.op.new_name
14930 class LUGroupEvacuate(LogicalUnit):
14931 HPATH = "group-evacuate"
14932 HTYPE = constants.HTYPE_GROUP
14935 def ExpandNames(self):
14936 # This raises errors.OpPrereqError on its own:
14937 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14939 if self.op.target_groups:
14940 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14941 self.op.target_groups)
14943 self.req_target_uuids = []
14945 if self.group_uuid in self.req_target_uuids:
14946 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14947 " as a target group (targets are %s)" %
14949 utils.CommaJoin(self.req_target_uuids)),
14950 errors.ECODE_INVAL)
14952 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14954 self.share_locks = _ShareAll()
14955 self.needed_locks = {
14956 locking.LEVEL_INSTANCE: [],
14957 locking.LEVEL_NODEGROUP: [],
14958 locking.LEVEL_NODE: [],
14961 def DeclareLocks(self, level):
14962 if level == locking.LEVEL_INSTANCE:
14963 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14965 # Lock instances optimistically, needs verification once node and group
14966 # locks have been acquired
14967 self.needed_locks[locking.LEVEL_INSTANCE] = \
14968 self.cfg.GetNodeGroupInstances(self.group_uuid)
14970 elif level == locking.LEVEL_NODEGROUP:
14971 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14973 if self.req_target_uuids:
14974 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14976 # Lock all groups used by instances optimistically; this requires going
14977 # via the node before it's locked, requiring verification later on
14978 lock_groups.update(group_uuid
14979 for instance_name in
14980 self.owned_locks(locking.LEVEL_INSTANCE)
14982 self.cfg.GetInstanceNodeGroups(instance_name))
14984 # No target groups, need to lock all of them
14985 lock_groups = locking.ALL_SET
14987 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14989 elif level == locking.LEVEL_NODE:
14990 # This will only lock the nodes in the group to be evacuated which
14991 # contain actual instances
14992 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14993 self._LockInstancesNodes()
14995 # Lock all nodes in group to be evacuated and target groups
14996 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14997 assert self.group_uuid in owned_groups
14998 member_nodes = [node_name
14999 for group in owned_groups
15000 for node_name in self.cfg.GetNodeGroup(group).members]
15001 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15003 def CheckPrereq(self):
15004 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15005 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15006 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15008 assert owned_groups.issuperset(self.req_target_uuids)
15009 assert self.group_uuid in owned_groups
15011 # Check if locked instances are still correct
15012 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15014 # Get instance information
15015 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15017 # Check if node groups for locked instances are still correct
15018 _CheckInstancesNodeGroups(self.cfg, self.instances,
15019 owned_groups, owned_nodes, self.group_uuid)
15021 if self.req_target_uuids:
15022 # User requested specific target groups
15023 self.target_uuids = self.req_target_uuids
15025 # All groups except the one to be evacuated are potential targets
15026 self.target_uuids = [group_uuid for group_uuid in owned_groups
15027 if group_uuid != self.group_uuid]
15029 if not self.target_uuids:
15030 raise errors.OpPrereqError("There are no possible target groups",
15031 errors.ECODE_INVAL)
15033 def BuildHooksEnv(self):
15034 """Build hooks env.
15038 "GROUP_NAME": self.op.group_name,
15039 "TARGET_GROUPS": " ".join(self.target_uuids),
15042 def BuildHooksNodes(self):
15043 """Build hooks nodes.
15046 mn = self.cfg.GetMasterNode()
15048 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15050 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15052 return (run_nodes, run_nodes)
15054 def Exec(self, feedback_fn):
15055 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15057 assert self.group_uuid not in self.target_uuids
15059 req = iallocator.IAReqGroupChange(instances=instances,
15060 target_groups=self.target_uuids)
15061 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15063 ial.Run(self.op.iallocator)
15065 if not ial.success:
15066 raise errors.OpPrereqError("Can't compute group evacuation using"
15067 " iallocator '%s': %s" %
15068 (self.op.iallocator, ial.info),
15069 errors.ECODE_NORES)
15071 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15073 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15074 len(jobs), self.op.group_name)
15076 return ResultWithJobs(jobs)
15079 class TagsLU(NoHooksLU): # pylint: disable=W0223
15080 """Generic tags LU.
15082 This is an abstract class which is the parent of all the other tags LUs.
15085 def ExpandNames(self):
15086 self.group_uuid = None
15087 self.needed_locks = {}
15089 if self.op.kind == constants.TAG_NODE:
15090 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15091 lock_level = locking.LEVEL_NODE
15092 lock_name = self.op.name
15093 elif self.op.kind == constants.TAG_INSTANCE:
15094 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15095 lock_level = locking.LEVEL_INSTANCE
15096 lock_name = self.op.name
15097 elif self.op.kind == constants.TAG_NODEGROUP:
15098 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15099 lock_level = locking.LEVEL_NODEGROUP
15100 lock_name = self.group_uuid
15101 elif self.op.kind == constants.TAG_NETWORK:
15102 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15103 lock_level = locking.LEVEL_NETWORK
15104 lock_name = self.network_uuid
15109 if lock_level and getattr(self.op, "use_locking", True):
15110 self.needed_locks[lock_level] = lock_name
15112 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15113 # not possible to acquire the BGL based on opcode parameters)
15115 def CheckPrereq(self):
15116 """Check prerequisites.
15119 if self.op.kind == constants.TAG_CLUSTER:
15120 self.target = self.cfg.GetClusterInfo()
15121 elif self.op.kind == constants.TAG_NODE:
15122 self.target = self.cfg.GetNodeInfo(self.op.name)
15123 elif self.op.kind == constants.TAG_INSTANCE:
15124 self.target = self.cfg.GetInstanceInfo(self.op.name)
15125 elif self.op.kind == constants.TAG_NODEGROUP:
15126 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15127 elif self.op.kind == constants.TAG_NETWORK:
15128 self.target = self.cfg.GetNetwork(self.network_uuid)
15130 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15131 str(self.op.kind), errors.ECODE_INVAL)
15134 class LUTagsGet(TagsLU):
15135 """Returns the tags of a given object.
15140 def ExpandNames(self):
15141 TagsLU.ExpandNames(self)
15143 # Share locks as this is only a read operation
15144 self.share_locks = _ShareAll()
15146 def Exec(self, feedback_fn):
15147 """Returns the tag list.
15150 return list(self.target.GetTags())
15153 class LUTagsSearch(NoHooksLU):
15154 """Searches the tags for a given pattern.
15159 def ExpandNames(self):
15160 self.needed_locks = {}
15162 def CheckPrereq(self):
15163 """Check prerequisites.
15165 This checks the pattern passed for validity by compiling it.
15169 self.re = re.compile(self.op.pattern)
15170 except re.error, err:
15171 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15172 (self.op.pattern, err), errors.ECODE_INVAL)
15174 def Exec(self, feedback_fn):
15175 """Returns the tag list.
15179 tgts = [("/cluster", cfg.GetClusterInfo())]
15180 ilist = cfg.GetAllInstancesInfo().values()
15181 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15182 nlist = cfg.GetAllNodesInfo().values()
15183 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15184 tgts.extend(("/nodegroup/%s" % n.name, n)
15185 for n in cfg.GetAllNodeGroupsInfo().values())
15187 for path, target in tgts:
15188 for tag in target.GetTags():
15189 if self.re.search(tag):
15190 results.append((path, tag))
15194 class LUTagsSet(TagsLU):
15195 """Sets a tag on a given object.
15200 def CheckPrereq(self):
15201 """Check prerequisites.
15203 This checks the type and length of the tag name and value.
15206 TagsLU.CheckPrereq(self)
15207 for tag in self.op.tags:
15208 objects.TaggableObject.ValidateTag(tag)
15210 def Exec(self, feedback_fn):
15215 for tag in self.op.tags:
15216 self.target.AddTag(tag)
15217 except errors.TagError, err:
15218 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15219 self.cfg.Update(self.target, feedback_fn)
15222 class LUTagsDel(TagsLU):
15223 """Delete a list of tags from a given object.
15228 def CheckPrereq(self):
15229 """Check prerequisites.
15231 This checks that we have the given tag.
15234 TagsLU.CheckPrereq(self)
15235 for tag in self.op.tags:
15236 objects.TaggableObject.ValidateTag(tag)
15237 del_tags = frozenset(self.op.tags)
15238 cur_tags = self.target.GetTags()
15240 diff_tags = del_tags - cur_tags
15242 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15243 raise errors.OpPrereqError("Tag(s) %s not found" %
15244 (utils.CommaJoin(diff_names), ),
15245 errors.ECODE_NOENT)
15247 def Exec(self, feedback_fn):
15248 """Remove the tag from the object.
15251 for tag in self.op.tags:
15252 self.target.RemoveTag(tag)
15253 self.cfg.Update(self.target, feedback_fn)
15256 class LUTestDelay(NoHooksLU):
15257 """Sleep for a specified amount of time.
15259 This LU sleeps on the master and/or nodes for a specified amount of
15265 def ExpandNames(self):
15266 """Expand names and set required locks.
15268 This expands the node list, if any.
15271 self.needed_locks = {}
15272 if self.op.on_nodes:
15273 # _GetWantedNodes can be used here, but is not always appropriate to use
15274 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15275 # more information.
15276 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15277 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15279 def _TestDelay(self):
15280 """Do the actual sleep.
15283 if self.op.on_master:
15284 if not utils.TestDelay(self.op.duration):
15285 raise errors.OpExecError("Error during master delay test")
15286 if self.op.on_nodes:
15287 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15288 for node, node_result in result.items():
15289 node_result.Raise("Failure during rpc call to node %s" % node)
15291 def Exec(self, feedback_fn):
15292 """Execute the test delay opcode, with the wanted repetitions.
15295 if self.op.repeat == 0:
15298 top_value = self.op.repeat - 1
15299 for i in range(self.op.repeat):
15300 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15304 class LURestrictedCommand(NoHooksLU):
15305 """Logical unit for executing restricted commands.
15310 def ExpandNames(self):
15312 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15314 self.needed_locks = {
15315 locking.LEVEL_NODE: self.op.nodes,
15317 self.share_locks = {
15318 locking.LEVEL_NODE: not self.op.use_locking,
15321 def CheckPrereq(self):
15322 """Check prerequisites.
15326 def Exec(self, feedback_fn):
15327 """Execute restricted command and return output.
15330 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15332 # Check if correct locks are held
15333 assert set(self.op.nodes).issubset(owned_nodes)
15335 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15339 for node_name in self.op.nodes:
15340 nres = rpcres[node_name]
15342 msg = ("Command '%s' on node '%s' failed: %s" %
15343 (self.op.command, node_name, nres.fail_msg))
15344 result.append((False, msg))
15346 result.append((True, nres.payload))
15351 class LUTestJqueue(NoHooksLU):
15352 """Utility LU to test some aspects of the job queue.
15357 # Must be lower than default timeout for WaitForJobChange to see whether it
15358 # notices changed jobs
15359 _CLIENT_CONNECT_TIMEOUT = 20.0
15360 _CLIENT_CONFIRM_TIMEOUT = 60.0
15363 def _NotifyUsingSocket(cls, cb, errcls):
15364 """Opens a Unix socket and waits for another program to connect.
15367 @param cb: Callback to send socket name to client
15368 @type errcls: class
15369 @param errcls: Exception class to use for errors
15372 # Using a temporary directory as there's no easy way to create temporary
15373 # sockets without writing a custom loop around tempfile.mktemp and
15375 tmpdir = tempfile.mkdtemp()
15377 tmpsock = utils.PathJoin(tmpdir, "sock")
15379 logging.debug("Creating temporary socket at %s", tmpsock)
15380 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15385 # Send details to client
15388 # Wait for client to connect before continuing
15389 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15391 (conn, _) = sock.accept()
15392 except socket.error, err:
15393 raise errcls("Client didn't connect in time (%s)" % err)
15397 # Remove as soon as client is connected
15398 shutil.rmtree(tmpdir)
15400 # Wait for client to close
15403 # pylint: disable=E1101
15404 # Instance of '_socketobject' has no ... member
15405 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15407 except socket.error, err:
15408 raise errcls("Client failed to confirm notification (%s)" % err)
15412 def _SendNotification(self, test, arg, sockname):
15413 """Sends a notification to the client.
15416 @param test: Test name
15417 @param arg: Test argument (depends on test)
15418 @type sockname: string
15419 @param sockname: Socket path
15422 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15424 def _Notify(self, prereq, test, arg):
15425 """Notifies the client of a test.
15428 @param prereq: Whether this is a prereq-phase test
15430 @param test: Test name
15431 @param arg: Test argument (depends on test)
15435 errcls = errors.OpPrereqError
15437 errcls = errors.OpExecError
15439 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15443 def CheckArguments(self):
15444 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15445 self.expandnames_calls = 0
15447 def ExpandNames(self):
15448 checkargs_calls = getattr(self, "checkargs_calls", 0)
15449 if checkargs_calls < 1:
15450 raise errors.ProgrammerError("CheckArguments was not called")
15452 self.expandnames_calls += 1
15454 if self.op.notify_waitlock:
15455 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15457 self.LogInfo("Expanding names")
15459 # Get lock on master node (just to get a lock, not for a particular reason)
15460 self.needed_locks = {
15461 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15464 def Exec(self, feedback_fn):
15465 if self.expandnames_calls < 1:
15466 raise errors.ProgrammerError("ExpandNames was not called")
15468 if self.op.notify_exec:
15469 self._Notify(False, constants.JQT_EXEC, None)
15471 self.LogInfo("Executing")
15473 if self.op.log_messages:
15474 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15475 for idx, msg in enumerate(self.op.log_messages):
15476 self.LogInfo("Sending log message %s", idx + 1)
15477 feedback_fn(constants.JQT_MSGPREFIX + msg)
15478 # Report how many test messages have been sent
15479 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15482 raise errors.OpExecError("Opcode failure was requested")
15487 class LUTestAllocator(NoHooksLU):
15488 """Run allocator tests.
15490 This LU runs the allocator tests
15493 def CheckPrereq(self):
15494 """Check prerequisites.
15496 This checks the opcode parameters depending on the director and mode test.
15499 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15500 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15501 for attr in ["memory", "disks", "disk_template",
15502 "os", "tags", "nics", "vcpus"]:
15503 if not hasattr(self.op, attr):
15504 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15505 attr, errors.ECODE_INVAL)
15506 iname = self.cfg.ExpandInstanceName(self.op.name)
15507 if iname is not None:
15508 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15509 iname, errors.ECODE_EXISTS)
15510 if not isinstance(self.op.nics, list):
15511 raise errors.OpPrereqError("Invalid parameter 'nics'",
15512 errors.ECODE_INVAL)
15513 if not isinstance(self.op.disks, list):
15514 raise errors.OpPrereqError("Invalid parameter 'disks'",
15515 errors.ECODE_INVAL)
15516 for row in self.op.disks:
15517 if (not isinstance(row, dict) or
15518 constants.IDISK_SIZE not in row or
15519 not isinstance(row[constants.IDISK_SIZE], int) or
15520 constants.IDISK_MODE not in row or
15521 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15522 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15523 " parameter", errors.ECODE_INVAL)
15524 if self.op.hypervisor is None:
15525 self.op.hypervisor = self.cfg.GetHypervisorType()
15526 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15527 fname = _ExpandInstanceName(self.cfg, self.op.name)
15528 self.op.name = fname
15529 self.relocate_from = \
15530 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15531 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15532 constants.IALLOCATOR_MODE_NODE_EVAC):
15533 if not self.op.instances:
15534 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15535 self.op.instances = _GetWantedInstances(self, self.op.instances)
15537 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15538 self.op.mode, errors.ECODE_INVAL)
15540 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15541 if self.op.iallocator is None:
15542 raise errors.OpPrereqError("Missing allocator name",
15543 errors.ECODE_INVAL)
15544 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15545 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15546 self.op.direction, errors.ECODE_INVAL)
15548 def Exec(self, feedback_fn):
15549 """Run the allocator test.
15552 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15553 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15554 memory=self.op.memory,
15555 disks=self.op.disks,
15556 disk_template=self.op.disk_template,
15560 vcpus=self.op.vcpus,
15561 spindle_use=self.op.spindle_use,
15562 hypervisor=self.op.hypervisor)
15563 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15564 req = iallocator.IAReqRelocate(name=self.op.name,
15565 relocate_from=list(self.relocate_from))
15566 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15567 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15568 target_groups=self.op.target_groups)
15569 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15570 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15571 evac_mode=self.op.evac_mode)
15572 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15573 disk_template = self.op.disk_template
15574 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15575 memory=self.op.memory,
15576 disks=self.op.disks,
15577 disk_template=disk_template,
15581 vcpus=self.op.vcpus,
15582 spindle_use=self.op.spindle_use,
15583 hypervisor=self.op.hypervisor)
15584 for idx in range(self.op.count)]
15585 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15587 raise errors.ProgrammerError("Uncatched mode %s in"
15588 " LUTestAllocator.Exec", self.op.mode)
15590 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15591 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15592 result = ial.in_text
15594 ial.Run(self.op.iallocator, validate=False)
15595 result = ial.out_text
15599 class LUNetworkAdd(LogicalUnit):
15600 """Logical unit for creating networks.
15603 HPATH = "network-add"
15604 HTYPE = constants.HTYPE_NETWORK
15607 def BuildHooksNodes(self):
15608 """Build hooks nodes.
15611 mn = self.cfg.GetMasterNode()
15612 return ([mn], [mn])
15614 def ExpandNames(self):
15615 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15617 if self.op.conflicts_check:
15618 self.share_locks[locking.LEVEL_NODE] = 1
15619 self.needed_locks = {
15620 locking.LEVEL_NODE: locking.ALL_SET,
15623 self.needed_locks = {}
15625 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15627 def CheckPrereq(self):
15628 """Check prerequisites.
15630 This checks that the given group name is not an existing node group
15634 if self.op.network is None:
15635 raise errors.OpPrereqError("Network must be given",
15636 errors.ECODE_INVAL)
15638 uuid = self.cfg.LookupNetwork(self.op.network_name)
15641 raise errors.OpPrereqError("Network '%s' already defined" %
15642 self.op.network, errors.ECODE_EXISTS)
15644 if self.op.mac_prefix:
15645 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15647 # Check tag validity
15648 for tag in self.op.tags:
15649 objects.TaggableObject.ValidateTag(tag)
15651 def BuildHooksEnv(self):
15652 """Build hooks env.
15656 "name": self.op.network_name,
15657 "subnet": self.op.network,
15658 "gateway": self.op.gateway,
15659 "network6": self.op.network6,
15660 "gateway6": self.op.gateway6,
15661 "mac_prefix": self.op.mac_prefix,
15662 "network_type": self.op.network_type,
15663 "tags": self.op.tags,
15665 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15667 def Exec(self, feedback_fn):
15668 """Add the ip pool to the cluster.
15671 nobj = objects.Network(name=self.op.network_name,
15672 network=self.op.network,
15673 gateway=self.op.gateway,
15674 network6=self.op.network6,
15675 gateway6=self.op.gateway6,
15676 mac_prefix=self.op.mac_prefix,
15677 network_type=self.op.network_type,
15678 uuid=self.network_uuid,
15679 family=constants.IP4_VERSION)
15680 # Initialize the associated address pool
15682 pool = network.AddressPool.InitializeNetwork(nobj)
15683 except errors.AddressPoolError, e:
15684 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15686 # Check if we need to reserve the nodes and the cluster master IP
15687 # These may not be allocated to any instances in routed mode, as
15688 # they wouldn't function anyway.
15689 if self.op.conflicts_check:
15690 for node in self.cfg.GetAllNodesInfo().values():
15691 for ip in [node.primary_ip, node.secondary_ip]:
15693 if pool.Contains(ip):
15695 self.LogInfo("Reserved IP address of node '%s' (%s)",
15697 except errors.AddressPoolError:
15698 self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
15701 master_ip = self.cfg.GetClusterInfo().master_ip
15703 if pool.Contains(master_ip):
15704 pool.Reserve(master_ip)
15705 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15706 except errors.AddressPoolError:
15707 self.LogWarning("Cannot reserve cluster master IP address (%s)",
15710 if self.op.add_reserved_ips:
15711 for ip in self.op.add_reserved_ips:
15713 pool.Reserve(ip, external=True)
15714 except errors.AddressPoolError, e:
15715 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15718 for tag in self.op.tags:
15721 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15722 del self.remove_locks[locking.LEVEL_NETWORK]
15725 class LUNetworkRemove(LogicalUnit):
15726 HPATH = "network-remove"
15727 HTYPE = constants.HTYPE_NETWORK
15730 def ExpandNames(self):
15731 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15733 if not self.network_uuid:
15734 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15735 errors.ECODE_INVAL)
15737 self.share_locks[locking.LEVEL_NODEGROUP] = 1
15738 self.needed_locks = {
15739 locking.LEVEL_NETWORK: [self.network_uuid],
15740 locking.LEVEL_NODEGROUP: locking.ALL_SET,
15743 def CheckPrereq(self):
15744 """Check prerequisites.
15746 This checks that the given network name exists as a network, that is
15747 empty (i.e., contains no nodes), and that is not the last group of the
15752 # Verify that the network is not conncted.
15753 node_groups = [group.name
15754 for group in self.cfg.GetAllNodeGroupsInfo().values()
15755 for net in group.networks.keys()
15756 if net == self.network_uuid]
15759 self.LogWarning("Nework '%s' is connected to the following"
15760 " node groups: %s" % (self.op.network_name,
15761 utils.CommaJoin(utils.NiceSort(node_groups))))
15762 raise errors.OpPrereqError("Network still connected",
15763 errors.ECODE_STATE)
15765 def BuildHooksEnv(self):
15766 """Build hooks env.
15770 "NETWORK_NAME": self.op.network_name,
15773 def BuildHooksNodes(self):
15774 """Build hooks nodes.
15777 mn = self.cfg.GetMasterNode()
15778 return ([mn], [mn])
15780 def Exec(self, feedback_fn):
15781 """Remove the network.
15785 self.cfg.RemoveNetwork(self.network_uuid)
15786 except errors.ConfigurationError:
15787 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15788 (self.op.network_name, self.network_uuid))
15791 class LUNetworkSetParams(LogicalUnit):
15792 """Modifies the parameters of a network.
15795 HPATH = "network-modify"
15796 HTYPE = constants.HTYPE_NETWORK
15799 def CheckArguments(self):
15800 if (self.op.gateway and
15801 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15802 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15803 " at once", errors.ECODE_INVAL)
15805 def ExpandNames(self):
15806 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15807 self.network = self.cfg.GetNetwork(self.network_uuid)
15808 if self.network is None:
15809 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15810 (self.op.network_name, self.network_uuid),
15811 errors.ECODE_INVAL)
15812 self.needed_locks = {
15813 locking.LEVEL_NETWORK: [self.network_uuid],
15816 def CheckPrereq(self):
15817 """Check prerequisites.
15820 self.gateway = self.network.gateway
15821 self.network_type = self.network.network_type
15822 self.mac_prefix = self.network.mac_prefix
15823 self.network6 = self.network.network6
15824 self.gateway6 = self.network.gateway6
15825 self.tags = self.network.tags
15827 self.pool = network.AddressPool(self.network)
15829 if self.op.gateway:
15830 if self.op.gateway == constants.VALUE_NONE:
15831 self.gateway = None
15833 self.gateway = self.op.gateway
15834 if self.pool.IsReserved(self.gateway):
15835 raise errors.OpPrereqError("%s is already reserved" %
15836 self.gateway, errors.ECODE_INVAL)
15838 if self.op.network_type:
15839 if self.op.network_type == constants.VALUE_NONE:
15840 self.network_type = None
15842 self.network_type = self.op.network_type
15844 if self.op.mac_prefix:
15845 if self.op.mac_prefix == constants.VALUE_NONE:
15846 self.mac_prefix = None
15848 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15849 self.mac_prefix = self.op.mac_prefix
15851 if self.op.gateway6:
15852 if self.op.gateway6 == constants.VALUE_NONE:
15853 self.gateway6 = None
15855 self.gateway6 = self.op.gateway6
15857 if self.op.network6:
15858 if self.op.network6 == constants.VALUE_NONE:
15859 self.network6 = None
15861 self.network6 = self.op.network6
15863 def BuildHooksEnv(self):
15864 """Build hooks env.
15868 "name": self.op.network_name,
15869 "subnet": self.network.network,
15870 "gateway": self.gateway,
15871 "network6": self.network6,
15872 "gateway6": self.gateway6,
15873 "mac_prefix": self.mac_prefix,
15874 "network_type": self.network_type,
15877 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15879 def BuildHooksNodes(self):
15880 """Build hooks nodes.
15883 mn = self.cfg.GetMasterNode()
15884 return ([mn], [mn])
15886 def Exec(self, feedback_fn):
15887 """Modifies the network.
15890 #TODO: reserve/release via temporary reservation manager
15891 # extend cfg.ReserveIp/ReleaseIp with the external flag
15892 if self.op.gateway:
15893 if self.gateway == self.network.gateway:
15894 self.LogWarning("Gateway is already %s", self.gateway)
15897 self.pool.Reserve(self.gateway, external=True)
15898 if self.network.gateway:
15899 self.pool.Release(self.network.gateway, external=True)
15900 self.network.gateway = self.gateway
15902 if self.op.add_reserved_ips:
15903 for ip in self.op.add_reserved_ips:
15905 if self.pool.IsReserved(ip):
15906 self.LogWarning("IP address %s is already reserved", ip)
15908 self.pool.Reserve(ip, external=True)
15909 except errors.AddressPoolError, err:
15910 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
15912 if self.op.remove_reserved_ips:
15913 for ip in self.op.remove_reserved_ips:
15914 if ip == self.network.gateway:
15915 self.LogWarning("Cannot unreserve Gateway's IP")
15918 if not self.pool.IsReserved(ip):
15919 self.LogWarning("IP address %s is already unreserved", ip)
15921 self.pool.Release(ip, external=True)
15922 except errors.AddressPoolError, err:
15923 self.LogWarning("Cannot release IP address %s: %s", ip, err)
15925 if self.op.mac_prefix:
15926 self.network.mac_prefix = self.mac_prefix
15928 if self.op.network6:
15929 self.network.network6 = self.network6
15931 if self.op.gateway6:
15932 self.network.gateway6 = self.gateway6
15934 if self.op.network_type:
15935 self.network.network_type = self.network_type
15937 self.pool.Validate()
15939 self.cfg.Update(self.network, feedback_fn)
15942 class _NetworkQuery(_QueryBase):
15943 FIELDS = query.NETWORK_FIELDS
15945 def ExpandNames(self, lu):
15946 lu.needed_locks = {}
15948 self._all_networks = lu.cfg.GetAllNetworksInfo()
15949 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15952 self.wanted = [name_to_uuid[name]
15953 for name in utils.NiceSort(name_to_uuid.keys())]
15955 # Accept names to be either names or UUIDs.
15958 all_uuid = frozenset(self._all_networks.keys())
15960 for name in self.names:
15961 if name in all_uuid:
15962 self.wanted.append(name)
15963 elif name in name_to_uuid:
15964 self.wanted.append(name_to_uuid[name])
15966 missing.append(name)
15969 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15970 errors.ECODE_NOENT)
15972 def DeclareLocks(self, lu, level):
15975 def _GetQueryData(self, lu):
15976 """Computes the list of networks and their attributes.
15979 do_instances = query.NETQ_INST in self.requested_data
15980 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15981 do_stats = query.NETQ_STATS in self.requested_data
15983 network_to_groups = None
15984 network_to_instances = None
15987 # For NETQ_GROUP, we need to map network->[groups]
15989 all_groups = lu.cfg.GetAllNodeGroupsInfo()
15990 network_to_groups = dict((uuid, []) for uuid in self.wanted)
15993 all_instances = lu.cfg.GetAllInstancesInfo()
15994 all_nodes = lu.cfg.GetAllNodesInfo()
15995 network_to_instances = dict((uuid, []) for uuid in self.wanted)
15997 for group in all_groups.values():
15999 group_nodes = [node.name for node in all_nodes.values() if
16000 node.group == group.uuid]
16001 group_instances = [instance for instance in all_instances.values()
16002 if instance.primary_node in group_nodes]
16004 for net_uuid in group.networks.keys():
16005 if net_uuid in network_to_groups:
16006 netparams = group.networks[net_uuid]
16007 mode = netparams[constants.NIC_MODE]
16008 link = netparams[constants.NIC_LINK]
16009 info = group.name + "(" + mode + ", " + link + ")"
16010 network_to_groups[net_uuid].append(info)
16013 for instance in group_instances:
16014 for nic in instance.nics:
16015 if nic.network == self._all_networks[net_uuid].name:
16016 network_to_instances[net_uuid].append(instance.name)
16021 for uuid, net in self._all_networks.items():
16022 if uuid in self.wanted:
16023 pool = network.AddressPool(net)
16025 "free_count": pool.GetFreeCount(),
16026 "reserved_count": pool.GetReservedCount(),
16027 "map": pool.GetMap(),
16028 "external_reservations":
16029 utils.CommaJoin(pool.GetExternalReservations()),
16032 return query.NetworkQueryData([self._all_networks[uuid]
16033 for uuid in self.wanted],
16035 network_to_instances,
16039 class LUNetworkQuery(NoHooksLU):
16040 """Logical unit for querying networks.
16045 def CheckArguments(self):
16046 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16047 self.op.output_fields, False)
16049 def ExpandNames(self):
16050 self.nq.ExpandNames(self)
16052 def Exec(self, feedback_fn):
16053 return self.nq.OldStyleQuery(self)
16056 class LUNetworkConnect(LogicalUnit):
16057 """Connect a network to a nodegroup
16060 HPATH = "network-connect"
16061 HTYPE = constants.HTYPE_NETWORK
16064 def ExpandNames(self):
16065 self.network_name = self.op.network_name
16066 self.group_name = self.op.group_name
16067 self.network_mode = self.op.network_mode
16068 self.network_link = self.op.network_link
16070 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16071 self.network = self.cfg.GetNetwork(self.network_uuid)
16072 if self.network is None:
16073 raise errors.OpPrereqError("Network %s does not exist" %
16074 self.network_name, errors.ECODE_INVAL)
16076 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16077 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16078 if self.group is None:
16079 raise errors.OpPrereqError("Group %s does not exist" %
16080 self.group_name, errors.ECODE_INVAL)
16082 self.share_locks[locking.LEVEL_INSTANCE] = 1
16083 self.needed_locks = {
16084 locking.LEVEL_INSTANCE: [],
16085 locking.LEVEL_NODEGROUP: [self.group_uuid],
16088 def DeclareLocks(self, level):
16089 if level == locking.LEVEL_INSTANCE:
16090 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16092 # Lock instances optimistically, needs verification once group lock has
16094 if self.op.conflicts_check:
16095 self.needed_locks[locking.LEVEL_INSTANCE] = \
16096 self.cfg.GetNodeGroupInstances(self.group_uuid)
16097 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16099 def BuildHooksEnv(self):
16101 "GROUP_NAME": self.group_name,
16102 "GROUP_NETWORK_MODE": self.network_mode,
16103 "GROUP_NETWORK_LINK": self.network_link,
16105 ret.update(_BuildNetworkHookEnvByObject(self.network))
16108 def BuildHooksNodes(self):
16109 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16110 return (nodes, nodes)
16112 def CheckPrereq(self):
16113 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16114 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16116 assert self.group_uuid in owned_groups
16118 # Check if locked instances are still correct
16119 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16121 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16125 constants.NIC_MODE: self.network_mode,
16126 constants.NIC_LINK: self.network_link,
16128 objects.NIC.CheckParameterSyntax(self.netparams)
16130 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16131 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16132 self.connected = False
16133 if self.network_uuid in self.group.networks:
16134 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16135 (self.network_name, self.group.name))
16136 self.connected = True
16139 if self.op.conflicts_check:
16140 pool = network.AddressPool(self.network)
16141 conflicting_instances = []
16143 for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16144 for idx, nic in enumerate(instance.nics):
16145 if pool.Contains(nic.ip):
16146 conflicting_instances.append((instance.name, idx, nic.ip))
16148 if conflicting_instances:
16149 self.LogWarning("Following occurences use IPs from network %s"
16150 " that is about to connect to nodegroup %s: %s" %
16151 (self.network_name, self.group.name,
16152 l(conflicting_instances)))
16153 raise errors.OpPrereqError("Conflicting IPs found."
16154 " Please remove/modify"
16155 " corresponding NICs",
16156 errors.ECODE_INVAL)
16158 def Exec(self, feedback_fn):
16162 self.group.networks[self.network_uuid] = self.netparams
16163 self.cfg.Update(self.group, feedback_fn)
16166 class LUNetworkDisconnect(LogicalUnit):
16167 """Disconnect a network to a nodegroup
16170 HPATH = "network-disconnect"
16171 HTYPE = constants.HTYPE_NETWORK
16174 def ExpandNames(self):
16175 self.network_name = self.op.network_name
16176 self.group_name = self.op.group_name
16178 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16179 self.network = self.cfg.GetNetwork(self.network_uuid)
16180 if self.network is None:
16181 raise errors.OpPrereqError("Network %s does not exist" %
16182 self.network_name, errors.ECODE_INVAL)
16184 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16185 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16186 if self.group is None:
16187 raise errors.OpPrereqError("Group %s does not exist" %
16188 self.group_name, errors.ECODE_INVAL)
16190 self.needed_locks = {
16191 locking.LEVEL_NODEGROUP: [self.group_uuid],
16193 self.share_locks[locking.LEVEL_INSTANCE] = 1
16195 def DeclareLocks(self, level):
16196 if level == locking.LEVEL_INSTANCE:
16197 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16199 # Lock instances optimistically, needs verification once group lock has
16201 if self.op.conflicts_check:
16202 self.needed_locks[locking.LEVEL_INSTANCE] = \
16203 self.cfg.GetNodeGroupInstances(self.group_uuid)
16205 def BuildHooksEnv(self):
16207 "GROUP_NAME": self.group_name,
16209 ret.update(_BuildNetworkHookEnvByObject(self.network))
16212 def BuildHooksNodes(self):
16213 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16214 return (nodes, nodes)
16216 def CheckPrereq(self):
16217 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16218 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16220 assert self.group_uuid in owned_groups
16222 # Check if locked instances are still correct
16223 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16225 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16228 self.connected = True
16229 if self.network_uuid not in self.group.networks:
16230 self.LogWarning("Network '%s' is not mapped to group '%s'",
16231 self.network_name, self.group.name)
16232 self.connected = False
16235 if self.op.conflicts_check:
16236 conflicting_instances = []
16238 for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16239 for idx, nic in enumerate(instance.nics):
16240 if nic.network == self.network_name:
16241 conflicting_instances.append((instance.name, idx, nic.ip))
16243 if conflicting_instances:
16244 self.LogWarning("Following occurences use IPs from network %s"
16245 " that is about to disconnected from the nodegroup"
16247 (self.network_name, self.group.name,
16248 l(conflicting_instances)))
16249 raise errors.OpPrereqError("Conflicting IPs."
16250 " Please remove/modify"
16251 " corresponding NICS",
16252 errors.ECODE_INVAL)
16254 def Exec(self, feedback_fn):
16255 if not self.connected:
16258 del self.group.networks[self.network_uuid]
16259 self.cfg.Update(self.group, feedback_fn)
16262 #: Query type implementations
16264 constants.QR_CLUSTER: _ClusterQuery,
16265 constants.QR_INSTANCE: _InstanceQuery,
16266 constants.QR_NODE: _NodeQuery,
16267 constants.QR_GROUP: _GroupQuery,
16268 constants.QR_NETWORK: _NetworkQuery,
16269 constants.QR_OS: _OsQuery,
16270 constants.QR_EXPORT: _ExportQuery,
16273 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16276 def _GetQueryImplementation(name):
16277 """Returns the implemtnation for a query type.
16279 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16283 return _QUERY_IMPL[name]
16285 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16286 errors.ECODE_INVAL)
16289 def _CheckForConflictingIp(lu, ip, node):
16290 """In case of conflicting ip raise error.
16293 @param ip: ip address
16295 @param node: node name
16298 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16299 if conf_net is not None:
16300 raise errors.OpPrereqError("Conflicting IP found:"
16301 " %s <> %s." % (ip, conf_net),
16302 errors.ECODE_INVAL)
16304 return (None, None)