4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _CopyLockList(names):
701 """Makes a copy of a list of lock names.
703 Handles L{locking.ALL_SET} correctly.
706 if names == locking.ALL_SET:
707 return locking.ALL_SET
712 def _GetWantedNodes(lu, nodes):
713 """Returns list of checked and expanded node names.
715 @type lu: L{LogicalUnit}
716 @param lu: the logical unit on whose behalf we execute
718 @param nodes: list of node names or None for all nodes
720 @return: the list of nodes, sorted
721 @raise errors.ProgrammerError: if the nodes parameter is wrong type
725 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
727 return utils.NiceSort(lu.cfg.GetNodeList())
730 def _GetWantedInstances(lu, instances):
731 """Returns list of checked and expanded instance names.
733 @type lu: L{LogicalUnit}
734 @param lu: the logical unit on whose behalf we execute
735 @type instances: list
736 @param instances: list of instance names or None for all instances
738 @return: the list of instances, sorted
739 @raise errors.OpPrereqError: if the instances parameter is wrong type
740 @raise errors.OpPrereqError: if any of the passed instances is not found
744 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
746 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
750 def _GetUpdatedParams(old_params, update_dict,
751 use_default=True, use_none=False):
752 """Return the new version of a parameter dictionary.
754 @type old_params: dict
755 @param old_params: old parameters
756 @type update_dict: dict
757 @param update_dict: dict containing new parameter values, or
758 constants.VALUE_DEFAULT to reset the parameter to its default
760 @param use_default: boolean
761 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
762 values as 'to be deleted' values
763 @param use_none: boolean
764 @type use_none: whether to recognise C{None} values as 'to be
767 @return: the new parameter dictionary
770 params_copy = copy.deepcopy(old_params)
771 for key, val in update_dict.iteritems():
772 if ((use_default and val == constants.VALUE_DEFAULT) or
773 (use_none and val is None)):
779 params_copy[key] = val
783 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
784 """Return the new version of a instance policy.
786 @param group_policy: whether this policy applies to a group and thus
787 we should support removal of policy entries
790 use_none = use_default = group_policy
791 ipolicy = copy.deepcopy(old_ipolicy)
792 for key, value in new_ipolicy.items():
793 if key not in constants.IPOLICY_ALL_KEYS:
794 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
796 if key in constants.IPOLICY_ISPECS:
797 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
798 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
800 use_default=use_default)
802 if (not value or value == [constants.VALUE_DEFAULT] or
803 value == constants.VALUE_DEFAULT):
807 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
808 " on the cluster'" % key,
811 if key in constants.IPOLICY_PARAMETERS:
812 # FIXME: we assume all such values are float
814 ipolicy[key] = float(value)
815 except (TypeError, ValueError), err:
816 raise errors.OpPrereqError("Invalid value for attribute"
817 " '%s': '%s', error: %s" %
818 (key, value, err), errors.ECODE_INVAL)
820 # FIXME: we assume all others are lists; this should be redone
822 ipolicy[key] = list(value)
824 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
825 except errors.ConfigurationError, err:
826 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
831 def _UpdateAndVerifySubDict(base, updates, type_check):
832 """Updates and verifies a dict with sub dicts of the same type.
834 @param base: The dict with the old data
835 @param updates: The dict with the new data
836 @param type_check: Dict suitable to ForceDictType to verify correct types
837 @returns: A new dict with updated and verified values
841 new = _GetUpdatedParams(old, value)
842 utils.ForceDictType(new, type_check)
845 ret = copy.deepcopy(base)
846 ret.update(dict((key, fn(base.get(key, {}), value))
847 for key, value in updates.items()))
851 def _MergeAndVerifyHvState(op_input, obj_input):
852 """Combines the hv state from an opcode with the one of the object
854 @param op_input: The input dict from the opcode
855 @param obj_input: The input dict from the objects
856 @return: The verified and updated dict
860 invalid_hvs = set(op_input) - constants.HYPER_TYPES
862 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
863 " %s" % utils.CommaJoin(invalid_hvs),
865 if obj_input is None:
867 type_check = constants.HVSTS_PARAMETER_TYPES
868 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
873 def _MergeAndVerifyDiskState(op_input, obj_input):
874 """Combines the disk state from an opcode with the one of the object
876 @param op_input: The input dict from the opcode
877 @param obj_input: The input dict from the objects
878 @return: The verified and updated dict
881 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
883 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
884 utils.CommaJoin(invalid_dst),
886 type_check = constants.DSS_PARAMETER_TYPES
887 if obj_input is None:
889 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
891 for key, value in op_input.items())
896 def _ReleaseLocks(lu, level, names=None, keep=None):
897 """Releases locks owned by an LU.
899 @type lu: L{LogicalUnit}
900 @param level: Lock level
901 @type names: list or None
902 @param names: Names of locks to release
903 @type keep: list or None
904 @param keep: Names of locks to retain
907 assert not (keep is not None and names is not None), \
908 "Only one of the 'names' and the 'keep' parameters can be given"
910 if names is not None:
911 should_release = names.__contains__
913 should_release = lambda name: name not in keep
915 should_release = None
917 owned = lu.owned_locks(level)
919 # Not owning any lock at this level, do nothing
926 # Determine which locks to release
928 if should_release(name):
933 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
935 # Release just some locks
936 lu.glm.release(level, names=release)
938 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
941 lu.glm.release(level)
943 assert not lu.glm.is_owned(level), "No locks should be owned"
946 def _MapInstanceDisksToNodes(instances):
947 """Creates a map from (node, volume) to instance name.
949 @type instances: list of L{objects.Instance}
950 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
953 return dict(((node, vol), inst.name)
954 for inst in instances
955 for (node, vols) in inst.MapLVsByNode().items()
959 def _RunPostHook(lu, node_name):
960 """Runs the post-hook for an opcode on a single node.
963 hm = lu.proc.BuildHooksManager(lu)
965 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
966 except Exception, err: # pylint: disable=W0703
967 lu.LogWarning("Errors occurred running hooks on %s: %s",
971 def _CheckOutputFields(static, dynamic, selected):
972 """Checks whether all selected fields are valid.
974 @type static: L{utils.FieldSet}
975 @param static: static fields set
976 @type dynamic: L{utils.FieldSet}
977 @param dynamic: dynamic fields set
984 delta = f.NonMatching(selected)
986 raise errors.OpPrereqError("Unknown output fields selected: %s"
987 % ",".join(delta), errors.ECODE_INVAL)
990 def _CheckGlobalHvParams(params):
991 """Validates that given hypervisor params are not global ones.
993 This will ensure that instances don't get customised versions of
997 used_globals = constants.HVC_GLOBALS.intersection(params)
999 msg = ("The following hypervisor parameters are global and cannot"
1000 " be customized at instance level, please modify them at"
1001 " cluster level: %s" % utils.CommaJoin(used_globals))
1002 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1005 def _CheckNodeOnline(lu, node, msg=None):
1006 """Ensure that a given node is online.
1008 @param lu: the LU on behalf of which we make the check
1009 @param node: the node to check
1010 @param msg: if passed, should be a message to replace the default one
1011 @raise errors.OpPrereqError: if the node is offline
1015 msg = "Can't use offline node"
1016 if lu.cfg.GetNodeInfo(node).offline:
1017 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1020 def _CheckNodeNotDrained(lu, node):
1021 """Ensure that a given node is not drained.
1023 @param lu: the LU on behalf of which we make the check
1024 @param node: the node to check
1025 @raise errors.OpPrereqError: if the node is drained
1028 if lu.cfg.GetNodeInfo(node).drained:
1029 raise errors.OpPrereqError("Can't use drained node %s" % node,
1033 def _CheckNodeVmCapable(lu, node):
1034 """Ensure that a given node is vm capable.
1036 @param lu: the LU on behalf of which we make the check
1037 @param node: the node to check
1038 @raise errors.OpPrereqError: if the node is not vm capable
1041 if not lu.cfg.GetNodeInfo(node).vm_capable:
1042 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1046 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1047 """Ensure that a node supports a given OS.
1049 @param lu: the LU on behalf of which we make the check
1050 @param node: the node to check
1051 @param os_name: the OS to query about
1052 @param force_variant: whether to ignore variant errors
1053 @raise errors.OpPrereqError: if the node is not supporting the OS
1056 result = lu.rpc.call_os_get(node, os_name)
1057 result.Raise("OS '%s' not in supported OS list for node %s" %
1059 prereq=True, ecode=errors.ECODE_INVAL)
1060 if not force_variant:
1061 _CheckOSVariant(result.payload, os_name)
1064 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1065 """Ensure that a node has the given secondary ip.
1067 @type lu: L{LogicalUnit}
1068 @param lu: the LU on behalf of which we make the check
1070 @param node: the node to check
1071 @type secondary_ip: string
1072 @param secondary_ip: the ip to check
1073 @type prereq: boolean
1074 @param prereq: whether to throw a prerequisite or an execute error
1075 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1076 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1079 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1080 result.Raise("Failure checking secondary ip on node %s" % node,
1081 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1082 if not result.payload:
1083 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1084 " please fix and re-run this command" % secondary_ip)
1086 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1088 raise errors.OpExecError(msg)
1091 def _GetClusterDomainSecret():
1092 """Reads the cluster domain secret.
1095 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1099 def _CheckInstanceState(lu, instance, req_states, msg=None):
1100 """Ensure that an instance is in one of the required states.
1102 @param lu: the LU on behalf of which we make the check
1103 @param instance: the instance to check
1104 @param msg: if passed, should be a message to replace the default one
1105 @raise errors.OpPrereqError: if the instance is not in the required state
1109 msg = ("can't use instance from outside %s states" %
1110 utils.CommaJoin(req_states))
1111 if instance.admin_state not in req_states:
1112 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1113 (instance.name, instance.admin_state, msg),
1116 if constants.ADMINST_UP not in req_states:
1117 pnode = instance.primary_node
1118 if not lu.cfg.GetNodeInfo(pnode).offline:
1119 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1120 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1121 prereq=True, ecode=errors.ECODE_ENVIRON)
1122 if instance.name in ins_l.payload:
1123 raise errors.OpPrereqError("Instance %s is running, %s" %
1124 (instance.name, msg), errors.ECODE_STATE)
1126 lu.LogWarning("Primary node offline, ignoring check that instance"
1130 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1131 """Computes if value is in the desired range.
1133 @param name: name of the parameter for which we perform the check
1134 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1136 @param ipolicy: dictionary containing min, max and std values
1137 @param value: actual value that we want to use
1138 @return: None or element not meeting the criteria
1142 if value in [None, constants.VALUE_AUTO]:
1144 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1145 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1146 if value > max_v or min_v > value:
1148 fqn = "%s/%s" % (name, qualifier)
1151 return ("%s value %s is not in range [%s, %s]" %
1152 (fqn, value, min_v, max_v))
1156 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1157 nic_count, disk_sizes, spindle_use,
1158 _compute_fn=_ComputeMinMaxSpec):
1159 """Verifies ipolicy against provided specs.
1162 @param ipolicy: The ipolicy
1164 @param mem_size: The memory size
1165 @type cpu_count: int
1166 @param cpu_count: Used cpu cores
1167 @type disk_count: int
1168 @param disk_count: Number of disks used
1169 @type nic_count: int
1170 @param nic_count: Number of nics used
1171 @type disk_sizes: list of ints
1172 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1173 @type spindle_use: int
1174 @param spindle_use: The number of spindles this instance uses
1175 @param _compute_fn: The compute function (unittest only)
1176 @return: A list of violations, or an empty list of no violations are found
1179 assert disk_count == len(disk_sizes)
1182 (constants.ISPEC_MEM_SIZE, "", mem_size),
1183 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1184 (constants.ISPEC_DISK_COUNT, "", disk_count),
1185 (constants.ISPEC_NIC_COUNT, "", nic_count),
1186 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1187 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1188 for idx, d in enumerate(disk_sizes)]
1191 (_compute_fn(name, qualifier, ipolicy, value)
1192 for (name, qualifier, value) in test_settings))
1195 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1196 _compute_fn=_ComputeIPolicySpecViolation):
1197 """Compute if instance meets the specs of ipolicy.
1200 @param ipolicy: The ipolicy to verify against
1201 @type instance: L{objects.Instance}
1202 @param instance: The instance to verify
1203 @param _compute_fn: The function to verify ipolicy (unittest only)
1204 @see: L{_ComputeIPolicySpecViolation}
1207 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1208 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1209 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1210 disk_count = len(instance.disks)
1211 disk_sizes = [disk.size for disk in instance.disks]
1212 nic_count = len(instance.nics)
1214 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1215 disk_sizes, spindle_use)
1218 def _ComputeIPolicyInstanceSpecViolation(
1219 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1220 """Compute if instance specs meets the specs of ipolicy.
1223 @param ipolicy: The ipolicy to verify against
1224 @param instance_spec: dict
1225 @param instance_spec: The instance spec to verify
1226 @param _compute_fn: The function to verify ipolicy (unittest only)
1227 @see: L{_ComputeIPolicySpecViolation}
1230 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1231 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1232 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1233 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1234 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1235 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1237 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1238 disk_sizes, spindle_use)
1241 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1243 _compute_fn=_ComputeIPolicyInstanceViolation):
1244 """Compute if instance meets the specs of the new target group.
1246 @param ipolicy: The ipolicy to verify
1247 @param instance: The instance object to verify
1248 @param current_group: The current group of the instance
1249 @param target_group: The new group of the instance
1250 @param _compute_fn: The function to verify ipolicy (unittest only)
1251 @see: L{_ComputeIPolicySpecViolation}
1254 if current_group == target_group:
1257 return _compute_fn(ipolicy, instance)
1260 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1261 _compute_fn=_ComputeIPolicyNodeViolation):
1262 """Checks that the target node is correct in terms of instance policy.
1264 @param ipolicy: The ipolicy to verify
1265 @param instance: The instance object to verify
1266 @param node: The new node to relocate
1267 @param ignore: Ignore violations of the ipolicy
1268 @param _compute_fn: The function to verify ipolicy (unittest only)
1269 @see: L{_ComputeIPolicySpecViolation}
1272 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1273 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1276 msg = ("Instance does not meet target node group's (%s) instance"
1277 " policy: %s") % (node.group, utils.CommaJoin(res))
1281 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1284 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1285 """Computes a set of any instances that would violate the new ipolicy.
1287 @param old_ipolicy: The current (still in-place) ipolicy
1288 @param new_ipolicy: The new (to become) ipolicy
1289 @param instances: List of instances to verify
1290 @return: A list of instances which violates the new ipolicy but
1294 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1295 _ComputeViolatingInstances(old_ipolicy, instances))
1298 def _ExpandItemName(fn, name, kind):
1299 """Expand an item name.
1301 @param fn: the function to use for expansion
1302 @param name: requested item name
1303 @param kind: text description ('Node' or 'Instance')
1304 @return: the resolved (full) name
1305 @raise errors.OpPrereqError: if the item is not found
1308 full_name = fn(name)
1309 if full_name is None:
1310 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1315 def _ExpandNodeName(cfg, name):
1316 """Wrapper over L{_ExpandItemName} for nodes."""
1317 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1320 def _ExpandInstanceName(cfg, name):
1321 """Wrapper over L{_ExpandItemName} for instance."""
1322 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1325 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1326 network_type, mac_prefix, tags):
1327 """Builds network related env variables for hooks
1329 This builds the hook environment from individual variables.
1332 @param name: the name of the network
1333 @type subnet: string
1334 @param subnet: the ipv4 subnet
1335 @type gateway: string
1336 @param gateway: the ipv4 gateway
1337 @type network6: string
1338 @param network6: the ipv6 subnet
1339 @type gateway6: string
1340 @param gateway6: the ipv6 gateway
1341 @type network_type: string
1342 @param network_type: the type of the network
1343 @type mac_prefix: string
1344 @param mac_prefix: the mac_prefix
1346 @param tags: the tags of the network
1351 env["NETWORK_NAME"] = name
1353 env["NETWORK_SUBNET"] = subnet
1355 env["NETWORK_GATEWAY"] = gateway
1357 env["NETWORK_SUBNET6"] = network6
1359 env["NETWORK_GATEWAY6"] = gateway6
1361 env["NETWORK_MAC_PREFIX"] = mac_prefix
1363 env["NETWORK_TYPE"] = network_type
1365 env["NETWORK_TAGS"] = " ".join(tags)
1370 def _BuildNetworkHookEnvByObject(net):
1371 """Builds network related env varliables for hooks
1373 @type net: L{objects.Network}
1374 @param net: the network object
1379 "subnet": net.network,
1380 "gateway": net.gateway,
1381 "network6": net.network6,
1382 "gateway6": net.gateway6,
1383 "network_type": net.network_type,
1384 "mac_prefix": net.mac_prefix,
1388 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1391 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1392 minmem, maxmem, vcpus, nics, disk_template, disks,
1393 bep, hvp, hypervisor_name, tags):
1394 """Builds instance related env variables for hooks
1396 This builds the hook environment from individual variables.
1399 @param name: the name of the instance
1400 @type primary_node: string
1401 @param primary_node: the name of the instance's primary node
1402 @type secondary_nodes: list
1403 @param secondary_nodes: list of secondary nodes as strings
1404 @type os_type: string
1405 @param os_type: the name of the instance's OS
1406 @type status: string
1407 @param status: the desired status of the instance
1408 @type minmem: string
1409 @param minmem: the minimum memory size of the instance
1410 @type maxmem: string
1411 @param maxmem: the maximum memory size of the instance
1413 @param vcpus: the count of VCPUs the instance has
1415 @param nics: list of tuples (ip, mac, mode, link, network) representing
1416 the NICs the instance has
1417 @type disk_template: string
1418 @param disk_template: the disk template of the instance
1420 @param disks: the list of (size, mode) pairs
1422 @param bep: the backend parameters for the instance
1424 @param hvp: the hypervisor parameters for the instance
1425 @type hypervisor_name: string
1426 @param hypervisor_name: the hypervisor for the instance
1428 @param tags: list of instance tags as strings
1430 @return: the hook environment for this instance
1435 "INSTANCE_NAME": name,
1436 "INSTANCE_PRIMARY": primary_node,
1437 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1438 "INSTANCE_OS_TYPE": os_type,
1439 "INSTANCE_STATUS": status,
1440 "INSTANCE_MINMEM": minmem,
1441 "INSTANCE_MAXMEM": maxmem,
1442 # TODO(2.7) remove deprecated "memory" value
1443 "INSTANCE_MEMORY": maxmem,
1444 "INSTANCE_VCPUS": vcpus,
1445 "INSTANCE_DISK_TEMPLATE": disk_template,
1446 "INSTANCE_HYPERVISOR": hypervisor_name,
1449 nic_count = len(nics)
1450 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1453 env["INSTANCE_NIC%d_IP" % idx] = ip
1454 env["INSTANCE_NIC%d_MAC" % idx] = mac
1455 env["INSTANCE_NIC%d_MODE" % idx] = mode
1456 env["INSTANCE_NIC%d_LINK" % idx] = link
1458 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1460 nobj = objects.Network.FromDict(netinfo)
1462 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1464 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1466 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1468 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1470 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1471 if nobj.network_type:
1472 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1474 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1475 if mode == constants.NIC_MODE_BRIDGED:
1476 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1480 env["INSTANCE_NIC_COUNT"] = nic_count
1483 disk_count = len(disks)
1484 for idx, (size, mode) in enumerate(disks):
1485 env["INSTANCE_DISK%d_SIZE" % idx] = size
1486 env["INSTANCE_DISK%d_MODE" % idx] = mode
1490 env["INSTANCE_DISK_COUNT"] = disk_count
1495 env["INSTANCE_TAGS"] = " ".join(tags)
1497 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1498 for key, value in source.items():
1499 env["INSTANCE_%s_%s" % (kind, key)] = value
1504 def _NICToTuple(lu, nic):
1505 """Build a tupple of nic information.
1507 @type lu: L{LogicalUnit}
1508 @param lu: the logical unit on whose behalf we execute
1509 @type nic: L{objects.NIC}
1510 @param nic: nic to convert to hooks tuple
1515 cluster = lu.cfg.GetClusterInfo()
1516 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1517 mode = filled_params[constants.NIC_MODE]
1518 link = filled_params[constants.NIC_LINK]
1522 net_uuid = lu.cfg.LookupNetwork(net)
1524 nobj = lu.cfg.GetNetwork(net_uuid)
1525 netinfo = objects.Network.ToDict(nobj)
1526 return (ip, mac, mode, link, net, netinfo)
1529 def _NICListToTuple(lu, nics):
1530 """Build a list of nic information tuples.
1532 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1533 value in LUInstanceQueryData.
1535 @type lu: L{LogicalUnit}
1536 @param lu: the logical unit on whose behalf we execute
1537 @type nics: list of L{objects.NIC}
1538 @param nics: list of nics to convert to hooks tuples
1543 hooks_nics.append(_NICToTuple(lu, nic))
1547 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1548 """Builds instance related env variables for hooks from an object.
1550 @type lu: L{LogicalUnit}
1551 @param lu: the logical unit on whose behalf we execute
1552 @type instance: L{objects.Instance}
1553 @param instance: the instance for which we should build the
1555 @type override: dict
1556 @param override: dictionary with key/values that will override
1559 @return: the hook environment dictionary
1562 cluster = lu.cfg.GetClusterInfo()
1563 bep = cluster.FillBE(instance)
1564 hvp = cluster.FillHV(instance)
1566 "name": instance.name,
1567 "primary_node": instance.primary_node,
1568 "secondary_nodes": instance.secondary_nodes,
1569 "os_type": instance.os,
1570 "status": instance.admin_state,
1571 "maxmem": bep[constants.BE_MAXMEM],
1572 "minmem": bep[constants.BE_MINMEM],
1573 "vcpus": bep[constants.BE_VCPUS],
1574 "nics": _NICListToTuple(lu, instance.nics),
1575 "disk_template": instance.disk_template,
1576 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1579 "hypervisor_name": instance.hypervisor,
1580 "tags": instance.tags,
1583 args.update(override)
1584 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1587 def _AdjustCandidatePool(lu, exceptions):
1588 """Adjust the candidate pool after node operations.
1591 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1593 lu.LogInfo("Promoted nodes to master candidate role: %s",
1594 utils.CommaJoin(node.name for node in mod_list))
1595 for name in mod_list:
1596 lu.context.ReaddNode(name)
1597 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1599 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1603 def _DecideSelfPromotion(lu, exceptions=None):
1604 """Decide whether I should promote myself as a master candidate.
1607 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1608 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1609 # the new node will increase mc_max with one, so:
1610 mc_should = min(mc_should + 1, cp_size)
1611 return mc_now < mc_should
1614 def _ComputeViolatingInstances(ipolicy, instances):
1615 """Computes a set of instances who violates given ipolicy.
1617 @param ipolicy: The ipolicy to verify
1618 @type instances: object.Instance
1619 @param instances: List of instances to verify
1620 @return: A frozenset of instance names violating the ipolicy
1623 return frozenset([inst.name for inst in instances
1624 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1627 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1628 """Check that the brigdes needed by a list of nics exist.
1631 cluster = lu.cfg.GetClusterInfo()
1632 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1633 brlist = [params[constants.NIC_LINK] for params in paramslist
1634 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1636 result = lu.rpc.call_bridges_exist(target_node, brlist)
1637 result.Raise("Error checking bridges on destination node '%s'" %
1638 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1641 def _CheckInstanceBridgesExist(lu, instance, node=None):
1642 """Check that the brigdes needed by an instance exist.
1646 node = instance.primary_node
1647 _CheckNicsBridgesExist(lu, instance.nics, node)
1650 def _CheckOSVariant(os_obj, name):
1651 """Check whether an OS name conforms to the os variants specification.
1653 @type os_obj: L{objects.OS}
1654 @param os_obj: OS object to check
1656 @param name: OS name passed by the user, to check for validity
1659 variant = objects.OS.GetVariant(name)
1660 if not os_obj.supported_variants:
1662 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1663 " passed)" % (os_obj.name, variant),
1667 raise errors.OpPrereqError("OS name must include a variant",
1670 if variant not in os_obj.supported_variants:
1671 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1674 def _GetNodeInstancesInner(cfg, fn):
1675 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1678 def _GetNodeInstances(cfg, node_name):
1679 """Returns a list of all primary and secondary instances on a node.
1683 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1686 def _GetNodePrimaryInstances(cfg, node_name):
1687 """Returns primary instances on a node.
1690 return _GetNodeInstancesInner(cfg,
1691 lambda inst: node_name == inst.primary_node)
1694 def _GetNodeSecondaryInstances(cfg, node_name):
1695 """Returns secondary instances on a node.
1698 return _GetNodeInstancesInner(cfg,
1699 lambda inst: node_name in inst.secondary_nodes)
1702 def _GetStorageTypeArgs(cfg, storage_type):
1703 """Returns the arguments for a storage type.
1706 # Special case for file storage
1707 if storage_type == constants.ST_FILE:
1708 # storage.FileStorage wants a list of storage directories
1709 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1714 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1717 for dev in instance.disks:
1718 cfg.SetDiskID(dev, node_name)
1720 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1722 result.Raise("Failed to get disk status from node %s" % node_name,
1723 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1725 for idx, bdev_status in enumerate(result.payload):
1726 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1732 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1733 """Check the sanity of iallocator and node arguments and use the
1734 cluster-wide iallocator if appropriate.
1736 Check that at most one of (iallocator, node) is specified. If none is
1737 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1738 then the LU's opcode's iallocator slot is filled with the cluster-wide
1741 @type iallocator_slot: string
1742 @param iallocator_slot: the name of the opcode iallocator slot
1743 @type node_slot: string
1744 @param node_slot: the name of the opcode target node slot
1747 node = getattr(lu.op, node_slot, None)
1748 ialloc = getattr(lu.op, iallocator_slot, None)
1752 if node is not None and ialloc is not None:
1753 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1755 elif ((node is None and ialloc is None) or
1756 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1757 default_iallocator = lu.cfg.GetDefaultIAllocator()
1758 if default_iallocator:
1759 setattr(lu.op, iallocator_slot, default_iallocator)
1761 raise errors.OpPrereqError("No iallocator or node given and no"
1762 " cluster-wide default iallocator found;"
1763 " please specify either an iallocator or a"
1764 " node, or set a cluster-wide default"
1765 " iallocator", errors.ECODE_INVAL)
1768 def _GetDefaultIAllocator(cfg, ialloc):
1769 """Decides on which iallocator to use.
1771 @type cfg: L{config.ConfigWriter}
1772 @param cfg: Cluster configuration object
1773 @type ialloc: string or None
1774 @param ialloc: Iallocator specified in opcode
1776 @return: Iallocator name
1780 # Use default iallocator
1781 ialloc = cfg.GetDefaultIAllocator()
1784 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1785 " opcode nor as a cluster-wide default",
1791 def _CheckHostnameSane(lu, name):
1792 """Ensures that a given hostname resolves to a 'sane' name.
1794 The given name is required to be a prefix of the resolved hostname,
1795 to prevent accidental mismatches.
1797 @param lu: the logical unit on behalf of which we're checking
1798 @param name: the name we should resolve and check
1799 @return: the resolved hostname object
1802 hostname = netutils.GetHostname(name=name)
1803 if hostname.name != name:
1804 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1805 if not utils.MatchNameComponent(name, [hostname.name]):
1806 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1807 " same as given hostname '%s'") %
1808 (hostname.name, name), errors.ECODE_INVAL)
1812 class LUClusterPostInit(LogicalUnit):
1813 """Logical unit for running hooks after cluster initialization.
1816 HPATH = "cluster-init"
1817 HTYPE = constants.HTYPE_CLUSTER
1819 def BuildHooksEnv(self):
1824 "OP_TARGET": self.cfg.GetClusterName(),
1827 def BuildHooksNodes(self):
1828 """Build hooks nodes.
1831 return ([], [self.cfg.GetMasterNode()])
1833 def Exec(self, feedback_fn):
1840 class LUClusterDestroy(LogicalUnit):
1841 """Logical unit for destroying the cluster.
1844 HPATH = "cluster-destroy"
1845 HTYPE = constants.HTYPE_CLUSTER
1847 def BuildHooksEnv(self):
1852 "OP_TARGET": self.cfg.GetClusterName(),
1855 def BuildHooksNodes(self):
1856 """Build hooks nodes.
1861 def CheckPrereq(self):
1862 """Check prerequisites.
1864 This checks whether the cluster is empty.
1866 Any errors are signaled by raising errors.OpPrereqError.
1869 master = self.cfg.GetMasterNode()
1871 nodelist = self.cfg.GetNodeList()
1872 if len(nodelist) != 1 or nodelist[0] != master:
1873 raise errors.OpPrereqError("There are still %d node(s) in"
1874 " this cluster." % (len(nodelist) - 1),
1876 instancelist = self.cfg.GetInstanceList()
1878 raise errors.OpPrereqError("There are still %d instance(s) in"
1879 " this cluster." % len(instancelist),
1882 def Exec(self, feedback_fn):
1883 """Destroys the cluster.
1886 master_params = self.cfg.GetMasterNetworkParameters()
1888 # Run post hooks on master node before it's removed
1889 _RunPostHook(self, master_params.name)
1891 ems = self.cfg.GetUseExternalMipScript()
1892 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1895 self.LogWarning("Error disabling the master IP address: %s",
1898 return master_params.name
1901 def _VerifyCertificate(filename):
1902 """Verifies a certificate for L{LUClusterVerifyConfig}.
1904 @type filename: string
1905 @param filename: Path to PEM file
1909 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1910 utils.ReadFile(filename))
1911 except Exception, err: # pylint: disable=W0703
1912 return (LUClusterVerifyConfig.ETYPE_ERROR,
1913 "Failed to load X509 certificate %s: %s" % (filename, err))
1916 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1917 constants.SSL_CERT_EXPIRATION_ERROR)
1920 fnamemsg = "While verifying %s: %s" % (filename, msg)
1925 return (None, fnamemsg)
1926 elif errcode == utils.CERT_WARNING:
1927 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1928 elif errcode == utils.CERT_ERROR:
1929 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1931 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1934 def _GetAllHypervisorParameters(cluster, instances):
1935 """Compute the set of all hypervisor parameters.
1937 @type cluster: L{objects.Cluster}
1938 @param cluster: the cluster object
1939 @param instances: list of L{objects.Instance}
1940 @param instances: additional instances from which to obtain parameters
1941 @rtype: list of (origin, hypervisor, parameters)
1942 @return: a list with all parameters found, indicating the hypervisor they
1943 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1948 for hv_name in cluster.enabled_hypervisors:
1949 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1951 for os_name, os_hvp in cluster.os_hvp.items():
1952 for hv_name, hv_params in os_hvp.items():
1954 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1955 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1957 # TODO: collapse identical parameter values in a single one
1958 for instance in instances:
1959 if instance.hvparams:
1960 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1961 cluster.FillHV(instance)))
1966 class _VerifyErrors(object):
1967 """Mix-in for cluster/group verify LUs.
1969 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1970 self.op and self._feedback_fn to be available.)
1974 ETYPE_FIELD = "code"
1975 ETYPE_ERROR = "ERROR"
1976 ETYPE_WARNING = "WARNING"
1978 def _Error(self, ecode, item, msg, *args, **kwargs):
1979 """Format an error message.
1981 Based on the opcode's error_codes parameter, either format a
1982 parseable error code, or a simpler error string.
1984 This must be called only from Exec and functions called from Exec.
1987 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1988 itype, etxt, _ = ecode
1989 # first complete the msg
1992 # then format the whole message
1993 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1994 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2000 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2001 # and finally report it via the feedback_fn
2002 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2004 def _ErrorIf(self, cond, ecode, *args, **kwargs):
2005 """Log an error message if the passed condition is True.
2009 or self.op.debug_simulate_errors) # pylint: disable=E1101
2011 # If the error code is in the list of ignored errors, demote the error to a
2013 (_, etxt, _) = ecode
2014 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2015 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2018 self._Error(ecode, *args, **kwargs)
2020 # do not mark the operation as failed for WARN cases only
2021 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2022 self.bad = self.bad or cond
2025 class LUClusterVerify(NoHooksLU):
2026 """Submits all jobs necessary to verify the cluster.
2031 def ExpandNames(self):
2032 self.needed_locks = {}
2034 def Exec(self, feedback_fn):
2037 if self.op.group_name:
2038 groups = [self.op.group_name]
2039 depends_fn = lambda: None
2041 groups = self.cfg.GetNodeGroupList()
2043 # Verify global configuration
2045 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2048 # Always depend on global verification
2049 depends_fn = lambda: [(-len(jobs), [])]
2052 [opcodes.OpClusterVerifyGroup(group_name=group,
2053 ignore_errors=self.op.ignore_errors,
2054 depends=depends_fn())]
2055 for group in groups)
2057 # Fix up all parameters
2058 for op in itertools.chain(*jobs): # pylint: disable=W0142
2059 op.debug_simulate_errors = self.op.debug_simulate_errors
2060 op.verbose = self.op.verbose
2061 op.error_codes = self.op.error_codes
2063 op.skip_checks = self.op.skip_checks
2064 except AttributeError:
2065 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2067 return ResultWithJobs(jobs)
2070 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2071 """Verifies the cluster config.
2076 def _VerifyHVP(self, hvp_data):
2077 """Verifies locally the syntax of the hypervisor parameters.
2080 for item, hv_name, hv_params in hvp_data:
2081 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2084 hv_class = hypervisor.GetHypervisor(hv_name)
2085 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2086 hv_class.CheckParameterSyntax(hv_params)
2087 except errors.GenericError, err:
2088 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2090 def ExpandNames(self):
2091 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2092 self.share_locks = _ShareAll()
2094 def CheckPrereq(self):
2095 """Check prerequisites.
2098 # Retrieve all information
2099 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2100 self.all_node_info = self.cfg.GetAllNodesInfo()
2101 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2103 def Exec(self, feedback_fn):
2104 """Verify integrity of cluster, performing various test on nodes.
2108 self._feedback_fn = feedback_fn
2110 feedback_fn("* Verifying cluster config")
2112 for msg in self.cfg.VerifyConfig():
2113 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2115 feedback_fn("* Verifying cluster certificate files")
2117 for cert_filename in pathutils.ALL_CERT_FILES:
2118 (errcode, msg) = _VerifyCertificate(cert_filename)
2119 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2121 feedback_fn("* Verifying hypervisor parameters")
2123 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2124 self.all_inst_info.values()))
2126 feedback_fn("* Verifying all nodes belong to an existing group")
2128 # We do this verification here because, should this bogus circumstance
2129 # occur, it would never be caught by VerifyGroup, which only acts on
2130 # nodes/instances reachable from existing node groups.
2132 dangling_nodes = set(node.name for node in self.all_node_info.values()
2133 if node.group not in self.all_group_info)
2135 dangling_instances = {}
2136 no_node_instances = []
2138 for inst in self.all_inst_info.values():
2139 if inst.primary_node in dangling_nodes:
2140 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2141 elif inst.primary_node not in self.all_node_info:
2142 no_node_instances.append(inst.name)
2147 utils.CommaJoin(dangling_instances.get(node.name,
2149 for node in dangling_nodes]
2151 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2153 "the following nodes (and their instances) belong to a non"
2154 " existing group: %s", utils.CommaJoin(pretty_dangling))
2156 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2158 "the following instances have a non-existing primary-node:"
2159 " %s", utils.CommaJoin(no_node_instances))
2164 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2165 """Verifies the status of a node group.
2168 HPATH = "cluster-verify"
2169 HTYPE = constants.HTYPE_CLUSTER
2172 _HOOKS_INDENT_RE = re.compile("^", re.M)
2174 class NodeImage(object):
2175 """A class representing the logical and physical status of a node.
2178 @ivar name: the node name to which this object refers
2179 @ivar volumes: a structure as returned from
2180 L{ganeti.backend.GetVolumeList} (runtime)
2181 @ivar instances: a list of running instances (runtime)
2182 @ivar pinst: list of configured primary instances (config)
2183 @ivar sinst: list of configured secondary instances (config)
2184 @ivar sbp: dictionary of {primary-node: list of instances} for all
2185 instances for which this node is secondary (config)
2186 @ivar mfree: free memory, as reported by hypervisor (runtime)
2187 @ivar dfree: free disk, as reported by the node (runtime)
2188 @ivar offline: the offline status (config)
2189 @type rpc_fail: boolean
2190 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2191 not whether the individual keys were correct) (runtime)
2192 @type lvm_fail: boolean
2193 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2194 @type hyp_fail: boolean
2195 @ivar hyp_fail: whether the RPC call didn't return the instance list
2196 @type ghost: boolean
2197 @ivar ghost: whether this is a known node or not (config)
2198 @type os_fail: boolean
2199 @ivar os_fail: whether the RPC call didn't return valid OS data
2201 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2202 @type vm_capable: boolean
2203 @ivar vm_capable: whether the node can host instances
2206 def __init__(self, offline=False, name=None, vm_capable=True):
2215 self.offline = offline
2216 self.vm_capable = vm_capable
2217 self.rpc_fail = False
2218 self.lvm_fail = False
2219 self.hyp_fail = False
2221 self.os_fail = False
2224 def ExpandNames(self):
2225 # This raises errors.OpPrereqError on its own:
2226 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2228 # Get instances in node group; this is unsafe and needs verification later
2230 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2232 self.needed_locks = {
2233 locking.LEVEL_INSTANCE: inst_names,
2234 locking.LEVEL_NODEGROUP: [self.group_uuid],
2235 locking.LEVEL_NODE: [],
2237 # This opcode is run by watcher every five minutes and acquires all nodes
2238 # for a group. It doesn't run for a long time, so it's better to acquire
2239 # the node allocation lock as well.
2240 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2243 self.share_locks = _ShareAll()
2245 def DeclareLocks(self, level):
2246 if level == locking.LEVEL_NODE:
2247 # Get members of node group; this is unsafe and needs verification later
2248 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2250 all_inst_info = self.cfg.GetAllInstancesInfo()
2252 # In Exec(), we warn about mirrored instances that have primary and
2253 # secondary living in separate node groups. To fully verify that
2254 # volumes for these instances are healthy, we will need to do an
2255 # extra call to their secondaries. We ensure here those nodes will
2257 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2258 # Important: access only the instances whose lock is owned
2259 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2260 nodes.update(all_inst_info[inst].secondary_nodes)
2262 self.needed_locks[locking.LEVEL_NODE] = nodes
2264 def CheckPrereq(self):
2265 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2266 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2268 group_nodes = set(self.group_info.members)
2270 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2273 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2275 unlocked_instances = \
2276 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2279 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2280 utils.CommaJoin(unlocked_nodes),
2283 if unlocked_instances:
2284 raise errors.OpPrereqError("Missing lock for instances: %s" %
2285 utils.CommaJoin(unlocked_instances),
2288 self.all_node_info = self.cfg.GetAllNodesInfo()
2289 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2291 self.my_node_names = utils.NiceSort(group_nodes)
2292 self.my_inst_names = utils.NiceSort(group_instances)
2294 self.my_node_info = dict((name, self.all_node_info[name])
2295 for name in self.my_node_names)
2297 self.my_inst_info = dict((name, self.all_inst_info[name])
2298 for name in self.my_inst_names)
2300 # We detect here the nodes that will need the extra RPC calls for verifying
2301 # split LV volumes; they should be locked.
2302 extra_lv_nodes = set()
2304 for inst in self.my_inst_info.values():
2305 if inst.disk_template in constants.DTS_INT_MIRROR:
2306 for nname in inst.all_nodes:
2307 if self.all_node_info[nname].group != self.group_uuid:
2308 extra_lv_nodes.add(nname)
2310 unlocked_lv_nodes = \
2311 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2313 if unlocked_lv_nodes:
2314 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2315 utils.CommaJoin(unlocked_lv_nodes),
2317 self.extra_lv_nodes = list(extra_lv_nodes)
2319 def _VerifyNode(self, ninfo, nresult):
2320 """Perform some basic validation on data returned from a node.
2322 - check the result data structure is well formed and has all the
2324 - check ganeti version
2326 @type ninfo: L{objects.Node}
2327 @param ninfo: the node to check
2328 @param nresult: the results from the node
2330 @return: whether overall this call was successful (and we can expect
2331 reasonable values in the respose)
2335 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2337 # main result, nresult should be a non-empty dict
2338 test = not nresult or not isinstance(nresult, dict)
2339 _ErrorIf(test, constants.CV_ENODERPC, node,
2340 "unable to verify node: no data returned")
2344 # compares ganeti version
2345 local_version = constants.PROTOCOL_VERSION
2346 remote_version = nresult.get("version", None)
2347 test = not (remote_version and
2348 isinstance(remote_version, (list, tuple)) and
2349 len(remote_version) == 2)
2350 _ErrorIf(test, constants.CV_ENODERPC, node,
2351 "connection to node returned invalid data")
2355 test = local_version != remote_version[0]
2356 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2357 "incompatible protocol versions: master %s,"
2358 " node %s", local_version, remote_version[0])
2362 # node seems compatible, we can actually try to look into its results
2364 # full package version
2365 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2366 constants.CV_ENODEVERSION, node,
2367 "software version mismatch: master %s, node %s",
2368 constants.RELEASE_VERSION, remote_version[1],
2369 code=self.ETYPE_WARNING)
2371 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2372 if ninfo.vm_capable and isinstance(hyp_result, dict):
2373 for hv_name, hv_result in hyp_result.iteritems():
2374 test = hv_result is not None
2375 _ErrorIf(test, constants.CV_ENODEHV, node,
2376 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2378 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2379 if ninfo.vm_capable and isinstance(hvp_result, list):
2380 for item, hv_name, hv_result in hvp_result:
2381 _ErrorIf(True, constants.CV_ENODEHV, node,
2382 "hypervisor %s parameter verify failure (source %s): %s",
2383 hv_name, item, hv_result)
2385 test = nresult.get(constants.NV_NODESETUP,
2386 ["Missing NODESETUP results"])
2387 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2392 def _VerifyNodeTime(self, ninfo, nresult,
2393 nvinfo_starttime, nvinfo_endtime):
2394 """Check the node time.
2396 @type ninfo: L{objects.Node}
2397 @param ninfo: the node to check
2398 @param nresult: the remote results for the node
2399 @param nvinfo_starttime: the start time of the RPC call
2400 @param nvinfo_endtime: the end time of the RPC call
2404 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2406 ntime = nresult.get(constants.NV_TIME, None)
2408 ntime_merged = utils.MergeTime(ntime)
2409 except (ValueError, TypeError):
2410 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2413 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2414 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2415 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2416 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2420 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2421 "Node time diverges by at least %s from master node time",
2424 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2425 """Check the node LVM results.
2427 @type ninfo: L{objects.Node}
2428 @param ninfo: the node to check
2429 @param nresult: the remote results for the node
2430 @param vg_name: the configured VG name
2437 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2439 # checks vg existence and size > 20G
2440 vglist = nresult.get(constants.NV_VGLIST, None)
2442 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2444 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2445 constants.MIN_VG_SIZE)
2446 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2449 pvlist = nresult.get(constants.NV_PVLIST, None)
2450 test = pvlist is None
2451 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2453 # check that ':' is not present in PV names, since it's a
2454 # special character for lvcreate (denotes the range of PEs to
2456 for _, pvname, owner_vg in pvlist:
2457 test = ":" in pvname
2458 _ErrorIf(test, constants.CV_ENODELVM, node,
2459 "Invalid character ':' in PV '%s' of VG '%s'",
2462 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2463 """Check the node bridges.
2465 @type ninfo: L{objects.Node}
2466 @param ninfo: the node to check
2467 @param nresult: the remote results for the node
2468 @param bridges: the expected list of bridges
2475 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2477 missing = nresult.get(constants.NV_BRIDGES, None)
2478 test = not isinstance(missing, list)
2479 _ErrorIf(test, constants.CV_ENODENET, node,
2480 "did not return valid bridge information")
2482 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2483 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2485 def _VerifyNodeUserScripts(self, ninfo, nresult):
2486 """Check the results of user scripts presence and executability on the node
2488 @type ninfo: L{objects.Node}
2489 @param ninfo: the node to check
2490 @param nresult: the remote results for the node
2495 test = not constants.NV_USERSCRIPTS in nresult
2496 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2497 "did not return user scripts information")
2499 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2501 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2502 "user scripts not present or not executable: %s" %
2503 utils.CommaJoin(sorted(broken_scripts)))
2505 def _VerifyNodeNetwork(self, ninfo, nresult):
2506 """Check the node network connectivity results.
2508 @type ninfo: L{objects.Node}
2509 @param ninfo: the node to check
2510 @param nresult: the remote results for the node
2514 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2516 test = constants.NV_NODELIST not in nresult
2517 _ErrorIf(test, constants.CV_ENODESSH, node,
2518 "node hasn't returned node ssh connectivity data")
2520 if nresult[constants.NV_NODELIST]:
2521 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2522 _ErrorIf(True, constants.CV_ENODESSH, node,
2523 "ssh communication with node '%s': %s", a_node, a_msg)
2525 test = constants.NV_NODENETTEST not in nresult
2526 _ErrorIf(test, constants.CV_ENODENET, node,
2527 "node hasn't returned node tcp connectivity data")
2529 if nresult[constants.NV_NODENETTEST]:
2530 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2532 _ErrorIf(True, constants.CV_ENODENET, node,
2533 "tcp communication with node '%s': %s",
2534 anode, nresult[constants.NV_NODENETTEST][anode])
2536 test = constants.NV_MASTERIP not in nresult
2537 _ErrorIf(test, constants.CV_ENODENET, node,
2538 "node hasn't returned node master IP reachability data")
2540 if not nresult[constants.NV_MASTERIP]:
2541 if node == self.master_node:
2542 msg = "the master node cannot reach the master IP (not configured?)"
2544 msg = "cannot reach the master IP"
2545 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2547 def _VerifyInstance(self, instance, instanceconfig, node_image,
2549 """Verify an instance.
2551 This function checks to see if the required block devices are
2552 available on the instance's node.
2555 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2556 node_current = instanceconfig.primary_node
2558 node_vol_should = {}
2559 instanceconfig.MapLVsByNode(node_vol_should)
2561 cluster = self.cfg.GetClusterInfo()
2562 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2564 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2565 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2566 code=self.ETYPE_WARNING)
2568 for node in node_vol_should:
2569 n_img = node_image[node]
2570 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2571 # ignore missing volumes on offline or broken nodes
2573 for volume in node_vol_should[node]:
2574 test = volume not in n_img.volumes
2575 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2576 "volume %s missing on node %s", volume, node)
2578 if instanceconfig.admin_state == constants.ADMINST_UP:
2579 pri_img = node_image[node_current]
2580 test = instance not in pri_img.instances and not pri_img.offline
2581 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2582 "instance not running on its primary node %s",
2585 diskdata = [(nname, success, status, idx)
2586 for (nname, disks) in diskstatus.items()
2587 for idx, (success, status) in enumerate(disks)]
2589 for nname, success, bdev_status, idx in diskdata:
2590 # the 'ghost node' construction in Exec() ensures that we have a
2592 snode = node_image[nname]
2593 bad_snode = snode.ghost or snode.offline
2594 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2595 not success and not bad_snode,
2596 constants.CV_EINSTANCEFAULTYDISK, instance,
2597 "couldn't retrieve status for disk/%s on %s: %s",
2598 idx, nname, bdev_status)
2599 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2600 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2601 constants.CV_EINSTANCEFAULTYDISK, instance,
2602 "disk/%s on %s is faulty", idx, nname)
2604 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2605 """Verify if there are any unknown volumes in the cluster.
2607 The .os, .swap and backup volumes are ignored. All other volumes are
2608 reported as unknown.
2610 @type reserved: L{ganeti.utils.FieldSet}
2611 @param reserved: a FieldSet of reserved volume names
2614 for node, n_img in node_image.items():
2615 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2616 self.all_node_info[node].group != self.group_uuid):
2617 # skip non-healthy nodes
2619 for volume in n_img.volumes:
2620 test = ((node not in node_vol_should or
2621 volume not in node_vol_should[node]) and
2622 not reserved.Matches(volume))
2623 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2624 "volume %s is unknown", volume)
2626 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2627 """Verify N+1 Memory Resilience.
2629 Check that if one single node dies we can still start all the
2630 instances it was primary for.
2633 cluster_info = self.cfg.GetClusterInfo()
2634 for node, n_img in node_image.items():
2635 # This code checks that every node which is now listed as
2636 # secondary has enough memory to host all instances it is
2637 # supposed to should a single other node in the cluster fail.
2638 # FIXME: not ready for failover to an arbitrary node
2639 # FIXME: does not support file-backed instances
2640 # WARNING: we currently take into account down instances as well
2641 # as up ones, considering that even if they're down someone
2642 # might want to start them even in the event of a node failure.
2643 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2644 # we're skipping nodes marked offline and nodes in other groups from
2645 # the N+1 warning, since most likely we don't have good memory
2646 # infromation from them; we already list instances living on such
2647 # nodes, and that's enough warning
2649 #TODO(dynmem): also consider ballooning out other instances
2650 for prinode, instances in n_img.sbp.items():
2652 for instance in instances:
2653 bep = cluster_info.FillBE(instance_cfg[instance])
2654 if bep[constants.BE_AUTO_BALANCE]:
2655 needed_mem += bep[constants.BE_MINMEM]
2656 test = n_img.mfree < needed_mem
2657 self._ErrorIf(test, constants.CV_ENODEN1, node,
2658 "not enough memory to accomodate instance failovers"
2659 " should node %s fail (%dMiB needed, %dMiB available)",
2660 prinode, needed_mem, n_img.mfree)
2663 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2664 (files_all, files_opt, files_mc, files_vm)):
2665 """Verifies file checksums collected from all nodes.
2667 @param errorif: Callback for reporting errors
2668 @param nodeinfo: List of L{objects.Node} objects
2669 @param master_node: Name of master node
2670 @param all_nvinfo: RPC results
2673 # Define functions determining which nodes to consider for a file
2676 (files_mc, lambda node: (node.master_candidate or
2677 node.name == master_node)),
2678 (files_vm, lambda node: node.vm_capable),
2681 # Build mapping from filename to list of nodes which should have the file
2683 for (files, fn) in files2nodefn:
2685 filenodes = nodeinfo
2687 filenodes = filter(fn, nodeinfo)
2688 nodefiles.update((filename,
2689 frozenset(map(operator.attrgetter("name"), filenodes)))
2690 for filename in files)
2692 assert set(nodefiles) == (files_all | files_mc | files_vm)
2694 fileinfo = dict((filename, {}) for filename in nodefiles)
2695 ignore_nodes = set()
2697 for node in nodeinfo:
2699 ignore_nodes.add(node.name)
2702 nresult = all_nvinfo[node.name]
2704 if nresult.fail_msg or not nresult.payload:
2707 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2708 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2709 for (key, value) in fingerprints.items())
2712 test = not (node_files and isinstance(node_files, dict))
2713 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2714 "Node did not return file checksum data")
2716 ignore_nodes.add(node.name)
2719 # Build per-checksum mapping from filename to nodes having it
2720 for (filename, checksum) in node_files.items():
2721 assert filename in nodefiles
2722 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2724 for (filename, checksums) in fileinfo.items():
2725 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2727 # Nodes having the file
2728 with_file = frozenset(node_name
2729 for nodes in fileinfo[filename].values()
2730 for node_name in nodes) - ignore_nodes
2732 expected_nodes = nodefiles[filename] - ignore_nodes
2734 # Nodes missing file
2735 missing_file = expected_nodes - with_file
2737 if filename in files_opt:
2739 errorif(missing_file and missing_file != expected_nodes,
2740 constants.CV_ECLUSTERFILECHECK, None,
2741 "File %s is optional, but it must exist on all or no"
2742 " nodes (not found on %s)",
2743 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2745 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2746 "File %s is missing from node(s) %s", filename,
2747 utils.CommaJoin(utils.NiceSort(missing_file)))
2749 # Warn if a node has a file it shouldn't
2750 unexpected = with_file - expected_nodes
2752 constants.CV_ECLUSTERFILECHECK, None,
2753 "File %s should not exist on node(s) %s",
2754 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2756 # See if there are multiple versions of the file
2757 test = len(checksums) > 1
2759 variants = ["variant %s on %s" %
2760 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2761 for (idx, (checksum, nodes)) in
2762 enumerate(sorted(checksums.items()))]
2766 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2767 "File %s found with %s different checksums (%s)",
2768 filename, len(checksums), "; ".join(variants))
2770 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2772 """Verifies and the node DRBD status.
2774 @type ninfo: L{objects.Node}
2775 @param ninfo: the node to check
2776 @param nresult: the remote results for the node
2777 @param instanceinfo: the dict of instances
2778 @param drbd_helper: the configured DRBD usermode helper
2779 @param drbd_map: the DRBD map as returned by
2780 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2784 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2787 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2788 test = (helper_result is None)
2789 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2790 "no drbd usermode helper returned")
2792 status, payload = helper_result
2794 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2795 "drbd usermode helper check unsuccessful: %s", payload)
2796 test = status and (payload != drbd_helper)
2797 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2798 "wrong drbd usermode helper: %s", payload)
2800 # compute the DRBD minors
2802 for minor, instance in drbd_map[node].items():
2803 test = instance not in instanceinfo
2804 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2805 "ghost instance '%s' in temporary DRBD map", instance)
2806 # ghost instance should not be running, but otherwise we
2807 # don't give double warnings (both ghost instance and
2808 # unallocated minor in use)
2810 node_drbd[minor] = (instance, False)
2812 instance = instanceinfo[instance]
2813 node_drbd[minor] = (instance.name,
2814 instance.admin_state == constants.ADMINST_UP)
2816 # and now check them
2817 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2818 test = not isinstance(used_minors, (tuple, list))
2819 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2820 "cannot parse drbd status file: %s", str(used_minors))
2822 # we cannot check drbd status
2825 for minor, (iname, must_exist) in node_drbd.items():
2826 test = minor not in used_minors and must_exist
2827 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2828 "drbd minor %d of instance %s is not active", minor, iname)
2829 for minor in used_minors:
2830 test = minor not in node_drbd
2831 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2832 "unallocated drbd minor %d is in use", minor)
2834 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2835 """Builds the node OS structures.
2837 @type ninfo: L{objects.Node}
2838 @param ninfo: the node to check
2839 @param nresult: the remote results for the node
2840 @param nimg: the node image object
2844 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2846 remote_os = nresult.get(constants.NV_OSLIST, None)
2847 test = (not isinstance(remote_os, list) or
2848 not compat.all(isinstance(v, list) and len(v) == 7
2849 for v in remote_os))
2851 _ErrorIf(test, constants.CV_ENODEOS, node,
2852 "node hasn't returned valid OS data")
2861 for (name, os_path, status, diagnose,
2862 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2864 if name not in os_dict:
2867 # parameters is a list of lists instead of list of tuples due to
2868 # JSON lacking a real tuple type, fix it:
2869 parameters = [tuple(v) for v in parameters]
2870 os_dict[name].append((os_path, status, diagnose,
2871 set(variants), set(parameters), set(api_ver)))
2873 nimg.oslist = os_dict
2875 def _VerifyNodeOS(self, ninfo, nimg, base):
2876 """Verifies the node OS list.
2878 @type ninfo: L{objects.Node}
2879 @param ninfo: the node to check
2880 @param nimg: the node image object
2881 @param base: the 'template' node we match against (e.g. from the master)
2885 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2887 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2889 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2890 for os_name, os_data in nimg.oslist.items():
2891 assert os_data, "Empty OS status for OS %s?!" % os_name
2892 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2893 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2894 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2895 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2896 "OS '%s' has multiple entries (first one shadows the rest): %s",
2897 os_name, utils.CommaJoin([v[0] for v in os_data]))
2898 # comparisons with the 'base' image
2899 test = os_name not in base.oslist
2900 _ErrorIf(test, constants.CV_ENODEOS, node,
2901 "Extra OS %s not present on reference node (%s)",
2905 assert base.oslist[os_name], "Base node has empty OS status?"
2906 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2908 # base OS is invalid, skipping
2910 for kind, a, b in [("API version", f_api, b_api),
2911 ("variants list", f_var, b_var),
2912 ("parameters", beautify_params(f_param),
2913 beautify_params(b_param))]:
2914 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2915 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2916 kind, os_name, base.name,
2917 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2919 # check any missing OSes
2920 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2921 _ErrorIf(missing, constants.CV_ENODEOS, node,
2922 "OSes present on reference node %s but missing on this node: %s",
2923 base.name, utils.CommaJoin(missing))
2925 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2926 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2928 @type ninfo: L{objects.Node}
2929 @param ninfo: the node to check
2930 @param nresult: the remote results for the node
2931 @type is_master: bool
2932 @param is_master: Whether node is the master node
2938 (constants.ENABLE_FILE_STORAGE or
2939 constants.ENABLE_SHARED_FILE_STORAGE)):
2941 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2943 # This should never happen
2944 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2945 "Node did not return forbidden file storage paths")
2947 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2948 "Found forbidden file storage paths: %s",
2949 utils.CommaJoin(fspaths))
2951 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2952 constants.CV_ENODEFILESTORAGEPATHS, node,
2953 "Node should not have returned forbidden file storage"
2956 def _VerifyOob(self, ninfo, nresult):
2957 """Verifies out of band functionality of a node.
2959 @type ninfo: L{objects.Node}
2960 @param ninfo: the node to check
2961 @param nresult: the remote results for the node
2965 # We just have to verify the paths on master and/or master candidates
2966 # as the oob helper is invoked on the master
2967 if ((ninfo.master_candidate or ninfo.master_capable) and
2968 constants.NV_OOB_PATHS in nresult):
2969 for path_result in nresult[constants.NV_OOB_PATHS]:
2970 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2972 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2973 """Verifies and updates the node volume data.
2975 This function will update a L{NodeImage}'s internal structures
2976 with data from the remote call.
2978 @type ninfo: L{objects.Node}
2979 @param ninfo: the node to check
2980 @param nresult: the remote results for the node
2981 @param nimg: the node image object
2982 @param vg_name: the configured VG name
2986 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2988 nimg.lvm_fail = True
2989 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2992 elif isinstance(lvdata, basestring):
2993 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2994 utils.SafeEncode(lvdata))
2995 elif not isinstance(lvdata, dict):
2996 _ErrorIf(True, constants.CV_ENODELVM, node,
2997 "rpc call to node failed (lvlist)")
2999 nimg.volumes = lvdata
3000 nimg.lvm_fail = False
3002 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3003 """Verifies and updates the node instance list.
3005 If the listing was successful, then updates this node's instance
3006 list. Otherwise, it marks the RPC call as failed for the instance
3009 @type ninfo: L{objects.Node}
3010 @param ninfo: the node to check
3011 @param nresult: the remote results for the node
3012 @param nimg: the node image object
3015 idata = nresult.get(constants.NV_INSTANCELIST, None)
3016 test = not isinstance(idata, list)
3017 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3018 "rpc call to node failed (instancelist): %s",
3019 utils.SafeEncode(str(idata)))
3021 nimg.hyp_fail = True
3023 nimg.instances = idata
3025 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3026 """Verifies and computes a node information map
3028 @type ninfo: L{objects.Node}
3029 @param ninfo: the node to check
3030 @param nresult: the remote results for the node
3031 @param nimg: the node image object
3032 @param vg_name: the configured VG name
3036 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3038 # try to read free memory (from the hypervisor)
3039 hv_info = nresult.get(constants.NV_HVINFO, None)
3040 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3041 _ErrorIf(test, constants.CV_ENODEHV, node,
3042 "rpc call to node failed (hvinfo)")
3045 nimg.mfree = int(hv_info["memory_free"])
3046 except (ValueError, TypeError):
3047 _ErrorIf(True, constants.CV_ENODERPC, node,
3048 "node returned invalid nodeinfo, check hypervisor")
3050 # FIXME: devise a free space model for file based instances as well
3051 if vg_name is not None:
3052 test = (constants.NV_VGLIST not in nresult or
3053 vg_name not in nresult[constants.NV_VGLIST])
3054 _ErrorIf(test, constants.CV_ENODELVM, node,
3055 "node didn't return data for the volume group '%s'"
3056 " - it is either missing or broken", vg_name)
3059 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3060 except (ValueError, TypeError):
3061 _ErrorIf(True, constants.CV_ENODERPC, node,
3062 "node returned invalid LVM info, check LVM status")
3064 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3065 """Gets per-disk status information for all instances.
3067 @type nodelist: list of strings
3068 @param nodelist: Node names
3069 @type node_image: dict of (name, L{objects.Node})
3070 @param node_image: Node objects
3071 @type instanceinfo: dict of (name, L{objects.Instance})
3072 @param instanceinfo: Instance objects
3073 @rtype: {instance: {node: [(succes, payload)]}}
3074 @return: a dictionary of per-instance dictionaries with nodes as
3075 keys and disk information as values; the disk information is a
3076 list of tuples (success, payload)
3079 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3082 node_disks_devonly = {}
3083 diskless_instances = set()
3084 diskless = constants.DT_DISKLESS
3086 for nname in nodelist:
3087 node_instances = list(itertools.chain(node_image[nname].pinst,
3088 node_image[nname].sinst))
3089 diskless_instances.update(inst for inst in node_instances
3090 if instanceinfo[inst].disk_template == diskless)
3091 disks = [(inst, disk)
3092 for inst in node_instances
3093 for disk in instanceinfo[inst].disks]
3096 # No need to collect data
3099 node_disks[nname] = disks
3101 # _AnnotateDiskParams makes already copies of the disks
3103 for (inst, dev) in disks:
3104 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3105 self.cfg.SetDiskID(anno_disk, nname)
3106 devonly.append(anno_disk)
3108 node_disks_devonly[nname] = devonly
3110 assert len(node_disks) == len(node_disks_devonly)
3112 # Collect data from all nodes with disks
3113 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3116 assert len(result) == len(node_disks)
3120 for (nname, nres) in result.items():
3121 disks = node_disks[nname]
3124 # No data from this node
3125 data = len(disks) * [(False, "node offline")]
3128 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3129 "while getting disk information: %s", msg)
3131 # No data from this node
3132 data = len(disks) * [(False, msg)]
3135 for idx, i in enumerate(nres.payload):
3136 if isinstance(i, (tuple, list)) and len(i) == 2:
3139 logging.warning("Invalid result from node %s, entry %d: %s",
3141 data.append((False, "Invalid result from the remote node"))
3143 for ((inst, _), status) in zip(disks, data):
3144 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3146 # Add empty entries for diskless instances.
3147 for inst in diskless_instances:
3148 assert inst not in instdisk
3151 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3152 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3153 compat.all(isinstance(s, (tuple, list)) and
3154 len(s) == 2 for s in statuses)
3155 for inst, nnames in instdisk.items()
3156 for nname, statuses in nnames.items())
3157 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3162 def _SshNodeSelector(group_uuid, all_nodes):
3163 """Create endless iterators for all potential SSH check hosts.
3166 nodes = [node for node in all_nodes
3167 if (node.group != group_uuid and
3169 keyfunc = operator.attrgetter("group")
3171 return map(itertools.cycle,
3172 [sorted(map(operator.attrgetter("name"), names))
3173 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3177 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3178 """Choose which nodes should talk to which other nodes.
3180 We will make nodes contact all nodes in their group, and one node from
3183 @warning: This algorithm has a known issue if one node group is much
3184 smaller than others (e.g. just one node). In such a case all other
3185 nodes will talk to the single node.
3188 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3189 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3191 return (online_nodes,
3192 dict((name, sorted([i.next() for i in sel]))
3193 for name in online_nodes))
3195 def BuildHooksEnv(self):
3198 Cluster-Verify hooks just ran in the post phase and their failure makes
3199 the output be logged in the verify output and the verification to fail.
3203 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3206 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3207 for node in self.my_node_info.values())
3211 def BuildHooksNodes(self):
3212 """Build hooks nodes.
3215 return ([], self.my_node_names)
3217 def Exec(self, feedback_fn):
3218 """Verify integrity of the node group, performing various test on nodes.
3221 # This method has too many local variables. pylint: disable=R0914
3222 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3224 if not self.my_node_names:
3226 feedback_fn("* Empty node group, skipping verification")
3230 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3231 verbose = self.op.verbose
3232 self._feedback_fn = feedback_fn
3234 vg_name = self.cfg.GetVGName()
3235 drbd_helper = self.cfg.GetDRBDHelper()
3236 cluster = self.cfg.GetClusterInfo()
3237 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3238 hypervisors = cluster.enabled_hypervisors
3239 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3241 i_non_redundant = [] # Non redundant instances
3242 i_non_a_balanced = [] # Non auto-balanced instances
3243 i_offline = 0 # Count of offline instances
3244 n_offline = 0 # Count of offline nodes
3245 n_drained = 0 # Count of nodes being drained
3246 node_vol_should = {}
3248 # FIXME: verify OS list
3251 filemap = _ComputeAncillaryFiles(cluster, False)
3253 # do local checksums
3254 master_node = self.master_node = self.cfg.GetMasterNode()
3255 master_ip = self.cfg.GetMasterIP()
3257 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3260 if self.cfg.GetUseExternalMipScript():
3261 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3263 node_verify_param = {
3264 constants.NV_FILELIST:
3265 map(vcluster.MakeVirtualPath,
3266 utils.UniqueSequence(filename
3267 for files in filemap
3268 for filename in files)),
3269 constants.NV_NODELIST:
3270 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3271 self.all_node_info.values()),
3272 constants.NV_HYPERVISOR: hypervisors,
3273 constants.NV_HVPARAMS:
3274 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3275 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3276 for node in node_data_list
3277 if not node.offline],
3278 constants.NV_INSTANCELIST: hypervisors,
3279 constants.NV_VERSION: None,
3280 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3281 constants.NV_NODESETUP: None,
3282 constants.NV_TIME: None,
3283 constants.NV_MASTERIP: (master_node, master_ip),
3284 constants.NV_OSLIST: None,
3285 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3286 constants.NV_USERSCRIPTS: user_scripts,
3289 if vg_name is not None:
3290 node_verify_param[constants.NV_VGLIST] = None
3291 node_verify_param[constants.NV_LVLIST] = vg_name
3292 node_verify_param[constants.NV_PVLIST] = [vg_name]
3295 node_verify_param[constants.NV_DRBDLIST] = None
3296 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3298 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3299 # Load file storage paths only from master node
3300 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3303 # FIXME: this needs to be changed per node-group, not cluster-wide
3305 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3306 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3307 bridges.add(default_nicpp[constants.NIC_LINK])
3308 for instance in self.my_inst_info.values():
3309 for nic in instance.nics:
3310 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3311 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3312 bridges.add(full_nic[constants.NIC_LINK])
3315 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3317 # Build our expected cluster state
3318 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3320 vm_capable=node.vm_capable))
3321 for node in node_data_list)
3325 for node in self.all_node_info.values():
3326 path = _SupportsOob(self.cfg, node)
3327 if path and path not in oob_paths:
3328 oob_paths.append(path)
3331 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3333 for instance in self.my_inst_names:
3334 inst_config = self.my_inst_info[instance]
3335 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3338 for nname in inst_config.all_nodes:
3339 if nname not in node_image:
3340 gnode = self.NodeImage(name=nname)
3341 gnode.ghost = (nname not in self.all_node_info)
3342 node_image[nname] = gnode
3344 inst_config.MapLVsByNode(node_vol_should)
3346 pnode = inst_config.primary_node
3347 node_image[pnode].pinst.append(instance)
3349 for snode in inst_config.secondary_nodes:
3350 nimg = node_image[snode]
3351 nimg.sinst.append(instance)
3352 if pnode not in nimg.sbp:
3353 nimg.sbp[pnode] = []
3354 nimg.sbp[pnode].append(instance)
3356 # At this point, we have the in-memory data structures complete,
3357 # except for the runtime information, which we'll gather next
3359 # Due to the way our RPC system works, exact response times cannot be
3360 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3361 # time before and after executing the request, we can at least have a time
3363 nvinfo_starttime = time.time()
3364 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3366 self.cfg.GetClusterName())
3367 nvinfo_endtime = time.time()
3369 if self.extra_lv_nodes and vg_name is not None:
3371 self.rpc.call_node_verify(self.extra_lv_nodes,
3372 {constants.NV_LVLIST: vg_name},
3373 self.cfg.GetClusterName())
3375 extra_lv_nvinfo = {}
3377 all_drbd_map = self.cfg.ComputeDRBDMap()
3379 feedback_fn("* Gathering disk information (%s nodes)" %
3380 len(self.my_node_names))
3381 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3384 feedback_fn("* Verifying configuration file consistency")
3386 # If not all nodes are being checked, we need to make sure the master node
3387 # and a non-checked vm_capable node are in the list.
3388 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3390 vf_nvinfo = all_nvinfo.copy()
3391 vf_node_info = list(self.my_node_info.values())
3392 additional_nodes = []
3393 if master_node not in self.my_node_info:
3394 additional_nodes.append(master_node)
3395 vf_node_info.append(self.all_node_info[master_node])
3396 # Add the first vm_capable node we find which is not included,
3397 # excluding the master node (which we already have)
3398 for node in absent_nodes:
3399 nodeinfo = self.all_node_info[node]
3400 if (nodeinfo.vm_capable and not nodeinfo.offline and
3401 node != master_node):
3402 additional_nodes.append(node)
3403 vf_node_info.append(self.all_node_info[node])
3405 key = constants.NV_FILELIST
3406 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3407 {key: node_verify_param[key]},
3408 self.cfg.GetClusterName()))
3410 vf_nvinfo = all_nvinfo
3411 vf_node_info = self.my_node_info.values()
3413 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3415 feedback_fn("* Verifying node status")
3419 for node_i in node_data_list:
3421 nimg = node_image[node]
3425 feedback_fn("* Skipping offline node %s" % (node,))
3429 if node == master_node:
3431 elif node_i.master_candidate:
3432 ntype = "master candidate"
3433 elif node_i.drained:
3439 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3441 msg = all_nvinfo[node].fail_msg
3442 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3445 nimg.rpc_fail = True
3448 nresult = all_nvinfo[node].payload
3450 nimg.call_ok = self._VerifyNode(node_i, nresult)
3451 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3452 self._VerifyNodeNetwork(node_i, nresult)
3453 self._VerifyNodeUserScripts(node_i, nresult)
3454 self._VerifyOob(node_i, nresult)
3455 self._VerifyFileStoragePaths(node_i, nresult,
3456 node == master_node)
3459 self._VerifyNodeLVM(node_i, nresult, vg_name)
3460 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3463 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3464 self._UpdateNodeInstances(node_i, nresult, nimg)
3465 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3466 self._UpdateNodeOS(node_i, nresult, nimg)
3468 if not nimg.os_fail:
3469 if refos_img is None:
3471 self._VerifyNodeOS(node_i, nimg, refos_img)
3472 self._VerifyNodeBridges(node_i, nresult, bridges)
3474 # Check whether all running instancies are primary for the node. (This
3475 # can no longer be done from _VerifyInstance below, since some of the
3476 # wrong instances could be from other node groups.)
3477 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3479 for inst in non_primary_inst:
3480 test = inst in self.all_inst_info
3481 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3482 "instance should not run on node %s", node_i.name)
3483 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3484 "node is running unknown instance %s", inst)
3486 for node, result in extra_lv_nvinfo.items():
3487 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3488 node_image[node], vg_name)
3490 feedback_fn("* Verifying instance status")
3491 for instance in self.my_inst_names:
3493 feedback_fn("* Verifying instance %s" % instance)
3494 inst_config = self.my_inst_info[instance]
3495 self._VerifyInstance(instance, inst_config, node_image,
3497 inst_nodes_offline = []
3499 pnode = inst_config.primary_node
3500 pnode_img = node_image[pnode]
3501 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3502 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3503 " primary node failed", instance)
3505 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3507 constants.CV_EINSTANCEBADNODE, instance,
3508 "instance is marked as running and lives on offline node %s",
3509 inst_config.primary_node)
3511 # If the instance is non-redundant we cannot survive losing its primary
3512 # node, so we are not N+1 compliant.
3513 if inst_config.disk_template not in constants.DTS_MIRRORED:
3514 i_non_redundant.append(instance)
3516 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3517 constants.CV_EINSTANCELAYOUT,
3518 instance, "instance has multiple secondary nodes: %s",
3519 utils.CommaJoin(inst_config.secondary_nodes),
3520 code=self.ETYPE_WARNING)
3522 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3523 pnode = inst_config.primary_node
3524 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3525 instance_groups = {}
3527 for node in instance_nodes:
3528 instance_groups.setdefault(self.all_node_info[node].group,
3532 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3533 # Sort so that we always list the primary node first.
3534 for group, nodes in sorted(instance_groups.items(),
3535 key=lambda (_, nodes): pnode in nodes,
3538 self._ErrorIf(len(instance_groups) > 1,
3539 constants.CV_EINSTANCESPLITGROUPS,
3540 instance, "instance has primary and secondary nodes in"
3541 " different groups: %s", utils.CommaJoin(pretty_list),
3542 code=self.ETYPE_WARNING)
3544 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3545 i_non_a_balanced.append(instance)
3547 for snode in inst_config.secondary_nodes:
3548 s_img = node_image[snode]
3549 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3550 snode, "instance %s, connection to secondary node failed",
3554 inst_nodes_offline.append(snode)
3556 # warn that the instance lives on offline nodes
3557 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3558 "instance has offline secondary node(s) %s",
3559 utils.CommaJoin(inst_nodes_offline))
3560 # ... or ghost/non-vm_capable nodes
3561 for node in inst_config.all_nodes:
3562 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3563 instance, "instance lives on ghost node %s", node)
3564 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3565 instance, "instance lives on non-vm_capable node %s", node)
3567 feedback_fn("* Verifying orphan volumes")
3568 reserved = utils.FieldSet(*cluster.reserved_lvs)
3570 # We will get spurious "unknown volume" warnings if any node of this group
3571 # is secondary for an instance whose primary is in another group. To avoid
3572 # them, we find these instances and add their volumes to node_vol_should.
3573 for inst in self.all_inst_info.values():
3574 for secondary in inst.secondary_nodes:
3575 if (secondary in self.my_node_info
3576 and inst.name not in self.my_inst_info):
3577 inst.MapLVsByNode(node_vol_should)
3580 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3582 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3583 feedback_fn("* Verifying N+1 Memory redundancy")
3584 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3586 feedback_fn("* Other Notes")
3588 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3589 % len(i_non_redundant))
3591 if i_non_a_balanced:
3592 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3593 % len(i_non_a_balanced))
3596 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3599 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3602 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3606 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3607 """Analyze the post-hooks' result
3609 This method analyses the hook result, handles it, and sends some
3610 nicely-formatted feedback back to the user.
3612 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3613 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3614 @param hooks_results: the results of the multi-node hooks rpc call
3615 @param feedback_fn: function used send feedback back to the caller
3616 @param lu_result: previous Exec result
3617 @return: the new Exec result, based on the previous result
3621 # We only really run POST phase hooks, only for non-empty groups,
3622 # and are only interested in their results
3623 if not self.my_node_names:
3626 elif phase == constants.HOOKS_PHASE_POST:
3627 # Used to change hooks' output to proper indentation
3628 feedback_fn("* Hooks Results")
3629 assert hooks_results, "invalid result from hooks"
3631 for node_name in hooks_results:
3632 res = hooks_results[node_name]
3634 test = msg and not res.offline
3635 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3636 "Communication failure in hooks execution: %s", msg)
3637 if res.offline or msg:
3638 # No need to investigate payload if node is offline or gave
3641 for script, hkr, output in res.payload:
3642 test = hkr == constants.HKR_FAIL
3643 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3644 "Script %s failed, output:", script)
3646 output = self._HOOKS_INDENT_RE.sub(" ", output)
3647 feedback_fn("%s" % output)
3653 class LUClusterVerifyDisks(NoHooksLU):
3654 """Verifies the cluster disks status.
3659 def ExpandNames(self):
3660 self.share_locks = _ShareAll()
3661 self.needed_locks = {
3662 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3665 def Exec(self, feedback_fn):
3666 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3668 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3669 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3670 for group in group_names])
3673 class LUGroupVerifyDisks(NoHooksLU):
3674 """Verifies the status of all disks in a node group.
3679 def ExpandNames(self):
3680 # Raises errors.OpPrereqError on its own if group can't be found
3681 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3683 self.share_locks = _ShareAll()
3684 self.needed_locks = {
3685 locking.LEVEL_INSTANCE: [],
3686 locking.LEVEL_NODEGROUP: [],
3687 locking.LEVEL_NODE: [],
3689 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3690 # starts one instance of this opcode for every group, which means all
3691 # nodes will be locked for a short amount of time, so it's better to
3692 # acquire the node allocation lock as well.
3693 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3696 def DeclareLocks(self, level):
3697 if level == locking.LEVEL_INSTANCE:
3698 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3700 # Lock instances optimistically, needs verification once node and group
3701 # locks have been acquired
3702 self.needed_locks[locking.LEVEL_INSTANCE] = \
3703 self.cfg.GetNodeGroupInstances(self.group_uuid)
3705 elif level == locking.LEVEL_NODEGROUP:
3706 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3708 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3709 set([self.group_uuid] +
3710 # Lock all groups used by instances optimistically; this requires
3711 # going via the node before it's locked, requiring verification
3714 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3715 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3717 elif level == locking.LEVEL_NODE:
3718 # This will only lock the nodes in the group to be verified which contain
3720 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3721 self._LockInstancesNodes()
3723 # Lock all nodes in group to be verified
3724 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3725 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3726 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3728 def CheckPrereq(self):
3729 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3730 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3731 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3733 assert self.group_uuid in owned_groups
3735 # Check if locked instances are still correct
3736 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3738 # Get instance information
3739 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3741 # Check if node groups for locked instances are still correct
3742 _CheckInstancesNodeGroups(self.cfg, self.instances,
3743 owned_groups, owned_nodes, self.group_uuid)
3745 def Exec(self, feedback_fn):
3746 """Verify integrity of cluster disks.
3748 @rtype: tuple of three items
3749 @return: a tuple of (dict of node-to-node_error, list of instances
3750 which need activate-disks, dict of instance: (node, volume) for
3755 res_instances = set()
3758 nv_dict = _MapInstanceDisksToNodes(
3759 [inst for inst in self.instances.values()
3760 if inst.admin_state == constants.ADMINST_UP])
3763 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3764 set(self.cfg.GetVmCapableNodeList()))
3766 node_lvs = self.rpc.call_lv_list(nodes, [])
3768 for (node, node_res) in node_lvs.items():
3769 if node_res.offline:
3772 msg = node_res.fail_msg
3774 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3775 res_nodes[node] = msg
3778 for lv_name, (_, _, lv_online) in node_res.payload.items():
3779 inst = nv_dict.pop((node, lv_name), None)
3780 if not (lv_online or inst is None):
3781 res_instances.add(inst)
3783 # any leftover items in nv_dict are missing LVs, let's arrange the data
3785 for key, inst in nv_dict.iteritems():
3786 res_missing.setdefault(inst, []).append(list(key))
3788 return (res_nodes, list(res_instances), res_missing)
3791 class LUClusterRepairDiskSizes(NoHooksLU):
3792 """Verifies the cluster disks sizes.
3797 def ExpandNames(self):
3798 if self.op.instances:
3799 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3800 # Not getting the node allocation lock as only a specific set of
3801 # instances (and their nodes) is going to be acquired
3802 self.needed_locks = {
3803 locking.LEVEL_NODE_RES: [],
3804 locking.LEVEL_INSTANCE: self.wanted_names,
3806 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3808 self.wanted_names = None
3809 self.needed_locks = {
3810 locking.LEVEL_NODE_RES: locking.ALL_SET,
3811 locking.LEVEL_INSTANCE: locking.ALL_SET,
3813 # This opcode is acquires the node locks for all instances
3814 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3817 self.share_locks = {
3818 locking.LEVEL_NODE_RES: 1,
3819 locking.LEVEL_INSTANCE: 0,
3820 locking.LEVEL_NODE_ALLOC: 1,
3823 def DeclareLocks(self, level):
3824 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3825 self._LockInstancesNodes(primary_only=True, level=level)
3827 def CheckPrereq(self):
3828 """Check prerequisites.
3830 This only checks the optional instance list against the existing names.
3833 if self.wanted_names is None:
3834 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3836 self.wanted_instances = \
3837 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3839 def _EnsureChildSizes(self, disk):
3840 """Ensure children of the disk have the needed disk size.
3842 This is valid mainly for DRBD8 and fixes an issue where the
3843 children have smaller disk size.
3845 @param disk: an L{ganeti.objects.Disk} object
3848 if disk.dev_type == constants.LD_DRBD8:
3849 assert disk.children, "Empty children for DRBD8?"
3850 fchild = disk.children[0]
3851 mismatch = fchild.size < disk.size
3853 self.LogInfo("Child disk has size %d, parent %d, fixing",
3854 fchild.size, disk.size)
3855 fchild.size = disk.size
3857 # and we recurse on this child only, not on the metadev
3858 return self._EnsureChildSizes(fchild) or mismatch
3862 def Exec(self, feedback_fn):
3863 """Verify the size of cluster disks.
3866 # TODO: check child disks too
3867 # TODO: check differences in size between primary/secondary nodes
3869 for instance in self.wanted_instances:
3870 pnode = instance.primary_node
3871 if pnode not in per_node_disks:
3872 per_node_disks[pnode] = []
3873 for idx, disk in enumerate(instance.disks):
3874 per_node_disks[pnode].append((instance, idx, disk))
3876 assert not (frozenset(per_node_disks.keys()) -
3877 self.owned_locks(locking.LEVEL_NODE_RES)), \
3878 "Not owning correct locks"
3879 assert not self.owned_locks(locking.LEVEL_NODE)
3882 for node, dskl in per_node_disks.items():
3883 newl = [v[2].Copy() for v in dskl]
3885 self.cfg.SetDiskID(dsk, node)
3886 result = self.rpc.call_blockdev_getsize(node, newl)
3888 self.LogWarning("Failure in blockdev_getsize call to node"
3889 " %s, ignoring", node)
3891 if len(result.payload) != len(dskl):
3892 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3893 " result.payload=%s", node, len(dskl), result.payload)
3894 self.LogWarning("Invalid result from node %s, ignoring node results",
3897 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3899 self.LogWarning("Disk %d of instance %s did not return size"
3900 " information, ignoring", idx, instance.name)
3902 if not isinstance(size, (int, long)):
3903 self.LogWarning("Disk %d of instance %s did not return valid"
3904 " size information, ignoring", idx, instance.name)
3907 if size != disk.size:
3908 self.LogInfo("Disk %d of instance %s has mismatched size,"
3909 " correcting: recorded %d, actual %d", idx,
3910 instance.name, disk.size, size)
3912 self.cfg.Update(instance, feedback_fn)
3913 changed.append((instance.name, idx, size))
3914 if self._EnsureChildSizes(disk):
3915 self.cfg.Update(instance, feedback_fn)
3916 changed.append((instance.name, idx, disk.size))
3920 class LUClusterRename(LogicalUnit):
3921 """Rename the cluster.
3924 HPATH = "cluster-rename"
3925 HTYPE = constants.HTYPE_CLUSTER
3927 def BuildHooksEnv(self):
3932 "OP_TARGET": self.cfg.GetClusterName(),
3933 "NEW_NAME": self.op.name,
3936 def BuildHooksNodes(self):
3937 """Build hooks nodes.
3940 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3942 def CheckPrereq(self):
3943 """Verify that the passed name is a valid one.
3946 hostname = netutils.GetHostname(name=self.op.name,
3947 family=self.cfg.GetPrimaryIPFamily())
3949 new_name = hostname.name
3950 self.ip = new_ip = hostname.ip
3951 old_name = self.cfg.GetClusterName()
3952 old_ip = self.cfg.GetMasterIP()
3953 if new_name == old_name and new_ip == old_ip:
3954 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3955 " cluster has changed",
3957 if new_ip != old_ip:
3958 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3959 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3960 " reachable on the network" %
3961 new_ip, errors.ECODE_NOTUNIQUE)
3963 self.op.name = new_name
3965 def Exec(self, feedback_fn):
3966 """Rename the cluster.
3969 clustername = self.op.name
3972 # shutdown the master IP
3973 master_params = self.cfg.GetMasterNetworkParameters()
3974 ems = self.cfg.GetUseExternalMipScript()
3975 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3977 result.Raise("Could not disable the master role")
3980 cluster = self.cfg.GetClusterInfo()
3981 cluster.cluster_name = clustername
3982 cluster.master_ip = new_ip
3983 self.cfg.Update(cluster, feedback_fn)
3985 # update the known hosts file
3986 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3987 node_list = self.cfg.GetOnlineNodeList()
3989 node_list.remove(master_params.name)
3992 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3994 master_params.ip = new_ip
3995 result = self.rpc.call_node_activate_master_ip(master_params.name,
3997 msg = result.fail_msg
3999 self.LogWarning("Could not re-enable the master role on"
4000 " the master, please restart manually: %s", msg)
4005 def _ValidateNetmask(cfg, netmask):
4006 """Checks if a netmask is valid.
4008 @type cfg: L{config.ConfigWriter}
4009 @param cfg: The cluster configuration
4011 @param netmask: the netmask to be verified
4012 @raise errors.OpPrereqError: if the validation fails
4015 ip_family = cfg.GetPrimaryIPFamily()
4017 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4018 except errors.ProgrammerError:
4019 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4020 ip_family, errors.ECODE_INVAL)
4021 if not ipcls.ValidateNetmask(netmask):
4022 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4023 (netmask), errors.ECODE_INVAL)
4026 class LUClusterSetParams(LogicalUnit):
4027 """Change the parameters of the cluster.
4030 HPATH = "cluster-modify"
4031 HTYPE = constants.HTYPE_CLUSTER
4034 def CheckArguments(self):
4038 if self.op.uid_pool:
4039 uidpool.CheckUidPool(self.op.uid_pool)
4041 if self.op.add_uids:
4042 uidpool.CheckUidPool(self.op.add_uids)
4044 if self.op.remove_uids:
4045 uidpool.CheckUidPool(self.op.remove_uids)
4047 if self.op.master_netmask is not None:
4048 _ValidateNetmask(self.cfg, self.op.master_netmask)
4050 if self.op.diskparams:
4051 for dt_params in self.op.diskparams.values():
4052 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4054 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4055 except errors.OpPrereqError, err:
4056 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4059 def ExpandNames(self):
4060 # FIXME: in the future maybe other cluster params won't require checking on
4061 # all nodes to be modified.
4062 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4063 # resource locks the right thing, shouldn't it be the BGL instead?
4064 self.needed_locks = {
4065 locking.LEVEL_NODE: locking.ALL_SET,
4066 locking.LEVEL_INSTANCE: locking.ALL_SET,
4067 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4068 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4070 self.share_locks = _ShareAll()
4072 def BuildHooksEnv(self):
4077 "OP_TARGET": self.cfg.GetClusterName(),
4078 "NEW_VG_NAME": self.op.vg_name,
4081 def BuildHooksNodes(self):
4082 """Build hooks nodes.
4085 mn = self.cfg.GetMasterNode()
4088 def CheckPrereq(self):
4089 """Check prerequisites.
4091 This checks whether the given params don't conflict and
4092 if the given volume group is valid.
4095 if self.op.vg_name is not None and not self.op.vg_name:
4096 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4097 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4098 " instances exist", errors.ECODE_INVAL)
4100 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4101 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4102 raise errors.OpPrereqError("Cannot disable drbd helper while"
4103 " drbd-based instances exist",
4106 node_list = self.owned_locks(locking.LEVEL_NODE)
4108 # if vg_name not None, checks given volume group on all nodes
4110 vglist = self.rpc.call_vg_list(node_list)
4111 for node in node_list:
4112 msg = vglist[node].fail_msg
4114 # ignoring down node
4115 self.LogWarning("Error while gathering data on node %s"
4116 " (ignoring node): %s", node, msg)
4118 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4120 constants.MIN_VG_SIZE)
4122 raise errors.OpPrereqError("Error on node '%s': %s" %
4123 (node, vgstatus), errors.ECODE_ENVIRON)
4125 if self.op.drbd_helper:
4126 # checks given drbd helper on all nodes
4127 helpers = self.rpc.call_drbd_helper(node_list)
4128 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4130 self.LogInfo("Not checking drbd helper on offline node %s", node)
4132 msg = helpers[node].fail_msg
4134 raise errors.OpPrereqError("Error checking drbd helper on node"
4135 " '%s': %s" % (node, msg),
4136 errors.ECODE_ENVIRON)
4137 node_helper = helpers[node].payload
4138 if node_helper != self.op.drbd_helper:
4139 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4140 (node, node_helper), errors.ECODE_ENVIRON)
4142 self.cluster = cluster = self.cfg.GetClusterInfo()
4143 # validate params changes
4144 if self.op.beparams:
4145 objects.UpgradeBeParams(self.op.beparams)
4146 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4147 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4149 if self.op.ndparams:
4150 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4151 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4153 # TODO: we need a more general way to handle resetting
4154 # cluster-level parameters to default values
4155 if self.new_ndparams["oob_program"] == "":
4156 self.new_ndparams["oob_program"] = \
4157 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4159 if self.op.hv_state:
4160 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4161 self.cluster.hv_state_static)
4162 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4163 for hv, values in new_hv_state.items())
4165 if self.op.disk_state:
4166 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4167 self.cluster.disk_state_static)
4168 self.new_disk_state = \
4169 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4170 for name, values in svalues.items()))
4171 for storage, svalues in new_disk_state.items())
4174 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4177 all_instances = self.cfg.GetAllInstancesInfo().values()
4179 for group in self.cfg.GetAllNodeGroupsInfo().values():
4180 instances = frozenset([inst for inst in all_instances
4181 if compat.any(node in group.members
4182 for node in inst.all_nodes)])
4183 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4184 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4185 new = _ComputeNewInstanceViolations(ipol,
4186 new_ipolicy, instances)
4188 violations.update(new)
4191 self.LogWarning("After the ipolicy change the following instances"
4192 " violate them: %s",
4193 utils.CommaJoin(utils.NiceSort(violations)))
4195 if self.op.nicparams:
4196 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4197 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4198 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4201 # check all instances for consistency
4202 for instance in self.cfg.GetAllInstancesInfo().values():
4203 for nic_idx, nic in enumerate(instance.nics):
4204 params_copy = copy.deepcopy(nic.nicparams)
4205 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4207 # check parameter syntax
4209 objects.NIC.CheckParameterSyntax(params_filled)
4210 except errors.ConfigurationError, err:
4211 nic_errors.append("Instance %s, nic/%d: %s" %
4212 (instance.name, nic_idx, err))
4214 # if we're moving instances to routed, check that they have an ip
4215 target_mode = params_filled[constants.NIC_MODE]
4216 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4217 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4218 " address" % (instance.name, nic_idx))
4220 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4221 "\n".join(nic_errors), errors.ECODE_INVAL)
4223 # hypervisor list/parameters
4224 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4225 if self.op.hvparams:
4226 for hv_name, hv_dict in self.op.hvparams.items():
4227 if hv_name not in self.new_hvparams:
4228 self.new_hvparams[hv_name] = hv_dict
4230 self.new_hvparams[hv_name].update(hv_dict)
4232 # disk template parameters
4233 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4234 if self.op.diskparams:
4235 for dt_name, dt_params in self.op.diskparams.items():
4236 if dt_name not in self.op.diskparams:
4237 self.new_diskparams[dt_name] = dt_params
4239 self.new_diskparams[dt_name].update(dt_params)
4241 # os hypervisor parameters
4242 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4244 for os_name, hvs in self.op.os_hvp.items():
4245 if os_name not in self.new_os_hvp:
4246 self.new_os_hvp[os_name] = hvs
4248 for hv_name, hv_dict in hvs.items():
4249 if hv_name not in self.new_os_hvp[os_name]:
4250 self.new_os_hvp[os_name][hv_name] = hv_dict
4252 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4255 self.new_osp = objects.FillDict(cluster.osparams, {})
4256 if self.op.osparams:
4257 for os_name, osp in self.op.osparams.items():
4258 if os_name not in self.new_osp:
4259 self.new_osp[os_name] = {}
4261 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4264 if not self.new_osp[os_name]:
4265 # we removed all parameters
4266 del self.new_osp[os_name]
4268 # check the parameter validity (remote check)
4269 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4270 os_name, self.new_osp[os_name])
4272 # changes to the hypervisor list
4273 if self.op.enabled_hypervisors is not None:
4274 self.hv_list = self.op.enabled_hypervisors
4275 for hv in self.hv_list:
4276 # if the hypervisor doesn't already exist in the cluster
4277 # hvparams, we initialize it to empty, and then (in both
4278 # cases) we make sure to fill the defaults, as we might not
4279 # have a complete defaults list if the hypervisor wasn't
4281 if hv not in new_hvp:
4283 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4284 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4286 self.hv_list = cluster.enabled_hypervisors
4288 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4289 # either the enabled list has changed, or the parameters have, validate
4290 for hv_name, hv_params in self.new_hvparams.items():
4291 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4292 (self.op.enabled_hypervisors and
4293 hv_name in self.op.enabled_hypervisors)):
4294 # either this is a new hypervisor, or its parameters have changed
4295 hv_class = hypervisor.GetHypervisor(hv_name)
4296 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4297 hv_class.CheckParameterSyntax(hv_params)
4298 _CheckHVParams(self, node_list, hv_name, hv_params)
4301 # no need to check any newly-enabled hypervisors, since the
4302 # defaults have already been checked in the above code-block
4303 for os_name, os_hvp in self.new_os_hvp.items():
4304 for hv_name, hv_params in os_hvp.items():
4305 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4306 # we need to fill in the new os_hvp on top of the actual hv_p
4307 cluster_defaults = self.new_hvparams.get(hv_name, {})
4308 new_osp = objects.FillDict(cluster_defaults, hv_params)
4309 hv_class = hypervisor.GetHypervisor(hv_name)
4310 hv_class.CheckParameterSyntax(new_osp)
4311 _CheckHVParams(self, node_list, hv_name, new_osp)
4313 if self.op.default_iallocator:
4314 alloc_script = utils.FindFile(self.op.default_iallocator,
4315 constants.IALLOCATOR_SEARCH_PATH,
4317 if alloc_script is None:
4318 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4319 " specified" % self.op.default_iallocator,
4322 def Exec(self, feedback_fn):
4323 """Change the parameters of the cluster.
4326 if self.op.vg_name is not None:
4327 new_volume = self.op.vg_name
4330 if new_volume != self.cfg.GetVGName():
4331 self.cfg.SetVGName(new_volume)
4333 feedback_fn("Cluster LVM configuration already in desired"
4334 " state, not changing")
4335 if self.op.drbd_helper is not None:
4336 new_helper = self.op.drbd_helper
4339 if new_helper != self.cfg.GetDRBDHelper():
4340 self.cfg.SetDRBDHelper(new_helper)
4342 feedback_fn("Cluster DRBD helper already in desired state,"
4344 if self.op.hvparams:
4345 self.cluster.hvparams = self.new_hvparams
4347 self.cluster.os_hvp = self.new_os_hvp
4348 if self.op.enabled_hypervisors is not None:
4349 self.cluster.hvparams = self.new_hvparams
4350 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4351 if self.op.beparams:
4352 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4353 if self.op.nicparams:
4354 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4356 self.cluster.ipolicy = self.new_ipolicy
4357 if self.op.osparams:
4358 self.cluster.osparams = self.new_osp
4359 if self.op.ndparams:
4360 self.cluster.ndparams = self.new_ndparams
4361 if self.op.diskparams:
4362 self.cluster.diskparams = self.new_diskparams
4363 if self.op.hv_state:
4364 self.cluster.hv_state_static = self.new_hv_state
4365 if self.op.disk_state:
4366 self.cluster.disk_state_static = self.new_disk_state
4368 if self.op.candidate_pool_size is not None:
4369 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4370 # we need to update the pool size here, otherwise the save will fail
4371 _AdjustCandidatePool(self, [])
4373 if self.op.maintain_node_health is not None:
4374 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4375 feedback_fn("Note: CONFD was disabled at build time, node health"
4376 " maintenance is not useful (still enabling it)")
4377 self.cluster.maintain_node_health = self.op.maintain_node_health
4379 if self.op.prealloc_wipe_disks is not None:
4380 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4382 if self.op.add_uids is not None:
4383 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4385 if self.op.remove_uids is not None:
4386 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4388 if self.op.uid_pool is not None:
4389 self.cluster.uid_pool = self.op.uid_pool
4391 if self.op.default_iallocator is not None:
4392 self.cluster.default_iallocator = self.op.default_iallocator
4394 if self.op.reserved_lvs is not None:
4395 self.cluster.reserved_lvs = self.op.reserved_lvs
4397 if self.op.use_external_mip_script is not None:
4398 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4400 def helper_os(aname, mods, desc):
4402 lst = getattr(self.cluster, aname)
4403 for key, val in mods:
4404 if key == constants.DDM_ADD:
4406 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4409 elif key == constants.DDM_REMOVE:
4413 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4415 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4417 if self.op.hidden_os:
4418 helper_os("hidden_os", self.op.hidden_os, "hidden")
4420 if self.op.blacklisted_os:
4421 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4423 if self.op.master_netdev:
4424 master_params = self.cfg.GetMasterNetworkParameters()
4425 ems = self.cfg.GetUseExternalMipScript()
4426 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4427 self.cluster.master_netdev)
4428 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4430 result.Raise("Could not disable the master ip")
4431 feedback_fn("Changing master_netdev from %s to %s" %
4432 (master_params.netdev, self.op.master_netdev))
4433 self.cluster.master_netdev = self.op.master_netdev
4435 if self.op.master_netmask:
4436 master_params = self.cfg.GetMasterNetworkParameters()
4437 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4438 result = self.rpc.call_node_change_master_netmask(master_params.name,
4439 master_params.netmask,
4440 self.op.master_netmask,
4442 master_params.netdev)
4444 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4447 self.cluster.master_netmask = self.op.master_netmask
4449 self.cfg.Update(self.cluster, feedback_fn)
4451 if self.op.master_netdev:
4452 master_params = self.cfg.GetMasterNetworkParameters()
4453 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4454 self.op.master_netdev)
4455 ems = self.cfg.GetUseExternalMipScript()
4456 result = self.rpc.call_node_activate_master_ip(master_params.name,
4459 self.LogWarning("Could not re-enable the master ip on"
4460 " the master, please restart manually: %s",
4464 def _UploadHelper(lu, nodes, fname):
4465 """Helper for uploading a file and showing warnings.
4468 if os.path.exists(fname):
4469 result = lu.rpc.call_upload_file(nodes, fname)
4470 for to_node, to_result in result.items():
4471 msg = to_result.fail_msg
4473 msg = ("Copy of file %s to node %s failed: %s" %
4474 (fname, to_node, msg))
4478 def _ComputeAncillaryFiles(cluster, redist):
4479 """Compute files external to Ganeti which need to be consistent.
4481 @type redist: boolean
4482 @param redist: Whether to include files which need to be redistributed
4485 # Compute files for all nodes
4487 pathutils.SSH_KNOWN_HOSTS_FILE,
4488 pathutils.CONFD_HMAC_KEY,
4489 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4490 pathutils.SPICE_CERT_FILE,
4491 pathutils.SPICE_CACERT_FILE,
4492 pathutils.RAPI_USERS_FILE,
4496 # we need to ship at least the RAPI certificate
4497 files_all.add(pathutils.RAPI_CERT_FILE)
4499 files_all.update(pathutils.ALL_CERT_FILES)
4500 files_all.update(ssconf.SimpleStore().GetFileList())
4502 if cluster.modify_etc_hosts:
4503 files_all.add(pathutils.ETC_HOSTS)
4505 if cluster.use_external_mip_script:
4506 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4508 # Files which are optional, these must:
4509 # - be present in one other category as well
4510 # - either exist or not exist on all nodes of that category (mc, vm all)
4512 pathutils.RAPI_USERS_FILE,
4515 # Files which should only be on master candidates
4519 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4523 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4524 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4525 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4527 # Files which should only be on VM-capable nodes
4530 for hv_name in cluster.enabled_hypervisors
4531 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4535 for hv_name in cluster.enabled_hypervisors
4536 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4538 # Filenames in each category must be unique
4539 all_files_set = files_all | files_mc | files_vm
4540 assert (len(all_files_set) ==
4541 sum(map(len, [files_all, files_mc, files_vm]))), \
4542 "Found file listed in more than one file list"
4544 # Optional files must be present in one other category
4545 assert all_files_set.issuperset(files_opt), \
4546 "Optional file not in a different required list"
4548 # This one file should never ever be re-distributed via RPC
4549 assert not (redist and
4550 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4552 return (files_all, files_opt, files_mc, files_vm)
4555 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4556 """Distribute additional files which are part of the cluster configuration.
4558 ConfigWriter takes care of distributing the config and ssconf files, but
4559 there are more files which should be distributed to all nodes. This function
4560 makes sure those are copied.
4562 @param lu: calling logical unit
4563 @param additional_nodes: list of nodes not in the config to distribute to
4564 @type additional_vm: boolean
4565 @param additional_vm: whether the additional nodes are vm-capable or not
4568 # Gather target nodes
4569 cluster = lu.cfg.GetClusterInfo()
4570 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4572 online_nodes = lu.cfg.GetOnlineNodeList()
4573 online_set = frozenset(online_nodes)
4574 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4576 if additional_nodes is not None:
4577 online_nodes.extend(additional_nodes)
4579 vm_nodes.extend(additional_nodes)
4581 # Never distribute to master node
4582 for nodelist in [online_nodes, vm_nodes]:
4583 if master_info.name in nodelist:
4584 nodelist.remove(master_info.name)
4587 (files_all, _, files_mc, files_vm) = \
4588 _ComputeAncillaryFiles(cluster, True)
4590 # Never re-distribute configuration file from here
4591 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4592 pathutils.CLUSTER_CONF_FILE in files_vm)
4593 assert not files_mc, "Master candidates not handled in this function"
4596 (online_nodes, files_all),
4597 (vm_nodes, files_vm),
4601 for (node_list, files) in filemap:
4603 _UploadHelper(lu, node_list, fname)
4606 class LUClusterRedistConf(NoHooksLU):
4607 """Force the redistribution of cluster configuration.
4609 This is a very simple LU.
4614 def ExpandNames(self):
4615 self.needed_locks = {
4616 locking.LEVEL_NODE: locking.ALL_SET,
4617 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4619 self.share_locks = _ShareAll()
4621 def Exec(self, feedback_fn):
4622 """Redistribute the configuration.
4625 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4626 _RedistributeAncillaryFiles(self)
4629 class LUClusterActivateMasterIp(NoHooksLU):
4630 """Activate the master IP on the master node.
4633 def Exec(self, feedback_fn):
4634 """Activate the master IP.
4637 master_params = self.cfg.GetMasterNetworkParameters()
4638 ems = self.cfg.GetUseExternalMipScript()
4639 result = self.rpc.call_node_activate_master_ip(master_params.name,
4641 result.Raise("Could not activate the master IP")
4644 class LUClusterDeactivateMasterIp(NoHooksLU):
4645 """Deactivate the master IP on the master node.
4648 def Exec(self, feedback_fn):
4649 """Deactivate the master IP.
4652 master_params = self.cfg.GetMasterNetworkParameters()
4653 ems = self.cfg.GetUseExternalMipScript()
4654 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4656 result.Raise("Could not deactivate the master IP")
4659 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4660 """Sleep and poll for an instance's disk to sync.
4663 if not instance.disks or disks is not None and not disks:
4666 disks = _ExpandCheckDisks(instance, disks)
4669 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4671 node = instance.primary_node
4674 lu.cfg.SetDiskID(dev, node)
4676 # TODO: Convert to utils.Retry
4679 degr_retries = 10 # in seconds, as we sleep 1 second each time
4683 cumul_degraded = False
4684 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4685 msg = rstats.fail_msg
4687 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4690 raise errors.RemoteError("Can't contact node %s for mirror data,"
4691 " aborting." % node)
4694 rstats = rstats.payload
4696 for i, mstat in enumerate(rstats):
4698 lu.LogWarning("Can't compute data for node %s/%s",
4699 node, disks[i].iv_name)
4702 cumul_degraded = (cumul_degraded or
4703 (mstat.is_degraded and mstat.sync_percent is None))
4704 if mstat.sync_percent is not None:
4706 if mstat.estimated_time is not None:
4707 rem_time = ("%s remaining (estimated)" %
4708 utils.FormatSeconds(mstat.estimated_time))
4709 max_time = mstat.estimated_time
4711 rem_time = "no time estimate"
4712 lu.LogInfo("- device %s: %5.2f%% done, %s",
4713 disks[i].iv_name, mstat.sync_percent, rem_time)
4715 # if we're done but degraded, let's do a few small retries, to
4716 # make sure we see a stable and not transient situation; therefore
4717 # we force restart of the loop
4718 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4719 logging.info("Degraded disks found, %d retries left", degr_retries)
4727 time.sleep(min(60, max_time))
4730 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4732 return not cumul_degraded
4735 def _BlockdevFind(lu, node, dev, instance):
4736 """Wrapper around call_blockdev_find to annotate diskparams.
4738 @param lu: A reference to the lu object
4739 @param node: The node to call out
4740 @param dev: The device to find
4741 @param instance: The instance object the device belongs to
4742 @returns The result of the rpc call
4745 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4746 return lu.rpc.call_blockdev_find(node, disk)
4749 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4750 """Wrapper around L{_CheckDiskConsistencyInner}.
4753 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4754 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4758 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4760 """Check that mirrors are not degraded.
4762 @attention: The device has to be annotated already.
4764 The ldisk parameter, if True, will change the test from the
4765 is_degraded attribute (which represents overall non-ok status for
4766 the device(s)) to the ldisk (representing the local storage status).
4769 lu.cfg.SetDiskID(dev, node)
4773 if on_primary or dev.AssembleOnSecondary():
4774 rstats = lu.rpc.call_blockdev_find(node, dev)
4775 msg = rstats.fail_msg
4777 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4779 elif not rstats.payload:
4780 lu.LogWarning("Can't find disk on node %s", node)
4784 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4786 result = result and not rstats.payload.is_degraded
4789 for child in dev.children:
4790 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4796 class LUOobCommand(NoHooksLU):
4797 """Logical unit for OOB handling.
4801 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4803 def ExpandNames(self):
4804 """Gather locks we need.
4807 if self.op.node_names:
4808 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4809 lock_names = self.op.node_names
4811 lock_names = locking.ALL_SET
4813 self.needed_locks = {
4814 locking.LEVEL_NODE: lock_names,
4817 def CheckPrereq(self):
4818 """Check prerequisites.
4821 - the node exists in the configuration
4824 Any errors are signaled by raising errors.OpPrereqError.
4828 self.master_node = self.cfg.GetMasterNode()
4830 assert self.op.power_delay >= 0.0
4832 if self.op.node_names:
4833 if (self.op.command in self._SKIP_MASTER and
4834 self.master_node in self.op.node_names):
4835 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4836 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4838 if master_oob_handler:
4839 additional_text = ("run '%s %s %s' if you want to operate on the"
4840 " master regardless") % (master_oob_handler,
4844 additional_text = "it does not support out-of-band operations"
4846 raise errors.OpPrereqError(("Operating on the master node %s is not"
4847 " allowed for %s; %s") %
4848 (self.master_node, self.op.command,
4849 additional_text), errors.ECODE_INVAL)
4851 self.op.node_names = self.cfg.GetNodeList()
4852 if self.op.command in self._SKIP_MASTER:
4853 self.op.node_names.remove(self.master_node)
4855 if self.op.command in self._SKIP_MASTER:
4856 assert self.master_node not in self.op.node_names
4858 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4860 raise errors.OpPrereqError("Node %s not found" % node_name,
4863 self.nodes.append(node)
4865 if (not self.op.ignore_status and
4866 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4867 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4868 " not marked offline") % node_name,
4871 def Exec(self, feedback_fn):
4872 """Execute OOB and return result if we expect any.
4875 master_node = self.master_node
4878 for idx, node in enumerate(utils.NiceSort(self.nodes,
4879 key=lambda node: node.name)):
4880 node_entry = [(constants.RS_NORMAL, node.name)]
4881 ret.append(node_entry)
4883 oob_program = _SupportsOob(self.cfg, node)
4886 node_entry.append((constants.RS_UNAVAIL, None))
4889 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4890 self.op.command, oob_program, node.name)
4891 result = self.rpc.call_run_oob(master_node, oob_program,
4892 self.op.command, node.name,
4896 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4897 node.name, result.fail_msg)
4898 node_entry.append((constants.RS_NODATA, None))
4901 self._CheckPayload(result)
4902 except errors.OpExecError, err:
4903 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4905 node_entry.append((constants.RS_NODATA, None))
4907 if self.op.command == constants.OOB_HEALTH:
4908 # For health we should log important events
4909 for item, status in result.payload:
4910 if status in [constants.OOB_STATUS_WARNING,
4911 constants.OOB_STATUS_CRITICAL]:
4912 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4913 item, node.name, status)
4915 if self.op.command == constants.OOB_POWER_ON:
4917 elif self.op.command == constants.OOB_POWER_OFF:
4918 node.powered = False
4919 elif self.op.command == constants.OOB_POWER_STATUS:
4920 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4921 if powered != node.powered:
4922 logging.warning(("Recorded power state (%s) of node '%s' does not"
4923 " match actual power state (%s)"), node.powered,
4926 # For configuration changing commands we should update the node
4927 if self.op.command in (constants.OOB_POWER_ON,
4928 constants.OOB_POWER_OFF):
4929 self.cfg.Update(node, feedback_fn)
4931 node_entry.append((constants.RS_NORMAL, result.payload))
4933 if (self.op.command == constants.OOB_POWER_ON and
4934 idx < len(self.nodes) - 1):
4935 time.sleep(self.op.power_delay)
4939 def _CheckPayload(self, result):
4940 """Checks if the payload is valid.
4942 @param result: RPC result
4943 @raises errors.OpExecError: If payload is not valid
4947 if self.op.command == constants.OOB_HEALTH:
4948 if not isinstance(result.payload, list):
4949 errs.append("command 'health' is expected to return a list but got %s" %
4950 type(result.payload))
4952 for item, status in result.payload:
4953 if status not in constants.OOB_STATUSES:
4954 errs.append("health item '%s' has invalid status '%s'" %
4957 if self.op.command == constants.OOB_POWER_STATUS:
4958 if not isinstance(result.payload, dict):
4959 errs.append("power-status is expected to return a dict but got %s" %
4960 type(result.payload))
4962 if self.op.command in [
4963 constants.OOB_POWER_ON,
4964 constants.OOB_POWER_OFF,
4965 constants.OOB_POWER_CYCLE,
4967 if result.payload is not None:
4968 errs.append("%s is expected to not return payload but got '%s'" %
4969 (self.op.command, result.payload))
4972 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4973 utils.CommaJoin(errs))
4976 class _OsQuery(_QueryBase):
4977 FIELDS = query.OS_FIELDS
4979 def ExpandNames(self, lu):
4980 # Lock all nodes in shared mode
4981 # Temporary removal of locks, should be reverted later
4982 # TODO: reintroduce locks when they are lighter-weight
4983 lu.needed_locks = {}
4984 #self.share_locks[locking.LEVEL_NODE] = 1
4985 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4987 # The following variables interact with _QueryBase._GetNames
4989 self.wanted = self.names
4991 self.wanted = locking.ALL_SET
4993 self.do_locking = self.use_locking
4995 def DeclareLocks(self, lu, level):
4999 def _DiagnoseByOS(rlist):
5000 """Remaps a per-node return list into an a per-os per-node dictionary
5002 @param rlist: a map with node names as keys and OS objects as values
5005 @return: a dictionary with osnames as keys and as value another
5006 map, with nodes as keys and tuples of (path, status, diagnose,
5007 variants, parameters, api_versions) as values, eg::
5009 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5010 (/srv/..., False, "invalid api")],
5011 "node2": [(/srv/..., True, "", [], [])]}
5016 # we build here the list of nodes that didn't fail the RPC (at RPC
5017 # level), so that nodes with a non-responding node daemon don't
5018 # make all OSes invalid
5019 good_nodes = [node_name for node_name in rlist
5020 if not rlist[node_name].fail_msg]
5021 for node_name, nr in rlist.items():
5022 if nr.fail_msg or not nr.payload:
5024 for (name, path, status, diagnose, variants,
5025 params, api_versions) in nr.payload:
5026 if name not in all_os:
5027 # build a list of nodes for this os containing empty lists
5028 # for each node in node_list
5030 for nname in good_nodes:
5031 all_os[name][nname] = []
5032 # convert params from [name, help] to (name, help)
5033 params = [tuple(v) for v in params]
5034 all_os[name][node_name].append((path, status, diagnose,
5035 variants, params, api_versions))
5038 def _GetQueryData(self, lu):
5039 """Computes the list of nodes and their attributes.
5042 # Locking is not used
5043 assert not (compat.any(lu.glm.is_owned(level)
5044 for level in locking.LEVELS
5045 if level != locking.LEVEL_CLUSTER) or
5046 self.do_locking or self.use_locking)
5048 valid_nodes = [node.name
5049 for node in lu.cfg.GetAllNodesInfo().values()
5050 if not node.offline and node.vm_capable]
5051 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5052 cluster = lu.cfg.GetClusterInfo()
5056 for (os_name, os_data) in pol.items():
5057 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5058 hidden=(os_name in cluster.hidden_os),
5059 blacklisted=(os_name in cluster.blacklisted_os))
5063 api_versions = set()
5065 for idx, osl in enumerate(os_data.values()):
5066 info.valid = bool(info.valid and osl and osl[0][1])
5070 (node_variants, node_params, node_api) = osl[0][3:6]
5073 variants.update(node_variants)
5074 parameters.update(node_params)
5075 api_versions.update(node_api)
5077 # Filter out inconsistent values
5078 variants.intersection_update(node_variants)
5079 parameters.intersection_update(node_params)
5080 api_versions.intersection_update(node_api)
5082 info.variants = list(variants)
5083 info.parameters = list(parameters)
5084 info.api_versions = list(api_versions)
5086 data[os_name] = info
5088 # Prepare data in requested order
5089 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5093 class LUOsDiagnose(NoHooksLU):
5094 """Logical unit for OS diagnose/query.
5100 def _BuildFilter(fields, names):
5101 """Builds a filter for querying OSes.
5104 name_filter = qlang.MakeSimpleFilter("name", names)
5106 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5107 # respective field is not requested
5108 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5109 for fname in ["hidden", "blacklisted"]
5110 if fname not in fields]
5111 if "valid" not in fields:
5112 status_filter.append([qlang.OP_TRUE, "valid"])
5115 status_filter.insert(0, qlang.OP_AND)
5117 status_filter = None
5119 if name_filter and status_filter:
5120 return [qlang.OP_AND, name_filter, status_filter]
5124 return status_filter
5126 def CheckArguments(self):
5127 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5128 self.op.output_fields, False)
5130 def ExpandNames(self):
5131 self.oq.ExpandNames(self)
5133 def Exec(self, feedback_fn):
5134 return self.oq.OldStyleQuery(self)
5137 class LUNodeRemove(LogicalUnit):
5138 """Logical unit for removing a node.
5141 HPATH = "node-remove"
5142 HTYPE = constants.HTYPE_NODE
5144 def BuildHooksEnv(self):
5149 "OP_TARGET": self.op.node_name,
5150 "NODE_NAME": self.op.node_name,
5153 def BuildHooksNodes(self):
5154 """Build hooks nodes.
5156 This doesn't run on the target node in the pre phase as a failed
5157 node would then be impossible to remove.
5160 all_nodes = self.cfg.GetNodeList()
5162 all_nodes.remove(self.op.node_name)
5165 return (all_nodes, all_nodes)
5167 def CheckPrereq(self):
5168 """Check prerequisites.
5171 - the node exists in the configuration
5172 - it does not have primary or secondary instances
5173 - it's not the master
5175 Any errors are signaled by raising errors.OpPrereqError.
5178 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5179 node = self.cfg.GetNodeInfo(self.op.node_name)
5180 assert node is not None
5182 masternode = self.cfg.GetMasterNode()
5183 if node.name == masternode:
5184 raise errors.OpPrereqError("Node is the master node, failover to another"
5185 " node is required", errors.ECODE_INVAL)
5187 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5188 if node.name in instance.all_nodes:
5189 raise errors.OpPrereqError("Instance %s is still running on the node,"
5190 " please remove first" % instance_name,
5192 self.op.node_name = node.name
5195 def Exec(self, feedback_fn):
5196 """Removes the node from the cluster.
5200 logging.info("Stopping the node daemon and removing configs from node %s",
5203 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5205 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5208 # Promote nodes to master candidate as needed
5209 _AdjustCandidatePool(self, exceptions=[node.name])
5210 self.context.RemoveNode(node.name)
5212 # Run post hooks on the node before it's removed
5213 _RunPostHook(self, node.name)
5215 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5216 msg = result.fail_msg
5218 self.LogWarning("Errors encountered on the remote node while leaving"
5219 " the cluster: %s", msg)
5221 # Remove node from our /etc/hosts
5222 if self.cfg.GetClusterInfo().modify_etc_hosts:
5223 master_node = self.cfg.GetMasterNode()
5224 result = self.rpc.call_etc_hosts_modify(master_node,
5225 constants.ETC_HOSTS_REMOVE,
5227 result.Raise("Can't update hosts file with new host data")
5228 _RedistributeAncillaryFiles(self)
5231 class _NodeQuery(_QueryBase):
5232 FIELDS = query.NODE_FIELDS
5234 def ExpandNames(self, lu):
5235 lu.needed_locks = {}
5236 lu.share_locks = _ShareAll()
5239 self.wanted = _GetWantedNodes(lu, self.names)
5241 self.wanted = locking.ALL_SET
5243 self.do_locking = (self.use_locking and
5244 query.NQ_LIVE in self.requested_data)
5247 # If any non-static field is requested we need to lock the nodes
5248 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5249 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5251 def DeclareLocks(self, lu, level):
5254 def _GetQueryData(self, lu):
5255 """Computes the list of nodes and their attributes.
5258 all_info = lu.cfg.GetAllNodesInfo()
5260 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5262 # Gather data as requested
5263 if query.NQ_LIVE in self.requested_data:
5264 # filter out non-vm_capable nodes
5265 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5267 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5268 [lu.cfg.GetHypervisorType()])
5269 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5270 for (name, nresult) in node_data.items()
5271 if not nresult.fail_msg and nresult.payload)
5275 if query.NQ_INST in self.requested_data:
5276 node_to_primary = dict([(name, set()) for name in nodenames])
5277 node_to_secondary = dict([(name, set()) for name in nodenames])
5279 inst_data = lu.cfg.GetAllInstancesInfo()
5281 for inst in inst_data.values():
5282 if inst.primary_node in node_to_primary:
5283 node_to_primary[inst.primary_node].add(inst.name)
5284 for secnode in inst.secondary_nodes:
5285 if secnode in node_to_secondary:
5286 node_to_secondary[secnode].add(inst.name)
5288 node_to_primary = None
5289 node_to_secondary = None
5291 if query.NQ_OOB in self.requested_data:
5292 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5293 for name, node in all_info.iteritems())
5297 if query.NQ_GROUP in self.requested_data:
5298 groups = lu.cfg.GetAllNodeGroupsInfo()
5302 return query.NodeQueryData([all_info[name] for name in nodenames],
5303 live_data, lu.cfg.GetMasterNode(),
5304 node_to_primary, node_to_secondary, groups,
5305 oob_support, lu.cfg.GetClusterInfo())
5308 class LUNodeQuery(NoHooksLU):
5309 """Logical unit for querying nodes.
5312 # pylint: disable=W0142
5315 def CheckArguments(self):
5316 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5317 self.op.output_fields, self.op.use_locking)
5319 def ExpandNames(self):
5320 self.nq.ExpandNames(self)
5322 def DeclareLocks(self, level):
5323 self.nq.DeclareLocks(self, level)
5325 def Exec(self, feedback_fn):
5326 return self.nq.OldStyleQuery(self)
5329 class LUNodeQueryvols(NoHooksLU):
5330 """Logical unit for getting volumes on node(s).
5334 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5335 _FIELDS_STATIC = utils.FieldSet("node")
5337 def CheckArguments(self):
5338 _CheckOutputFields(static=self._FIELDS_STATIC,
5339 dynamic=self._FIELDS_DYNAMIC,
5340 selected=self.op.output_fields)
5342 def ExpandNames(self):
5343 self.share_locks = _ShareAll()
5344 self.needed_locks = {}
5346 if not self.op.nodes:
5347 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5349 self.needed_locks[locking.LEVEL_NODE] = \
5350 _GetWantedNodes(self, self.op.nodes)
5352 def Exec(self, feedback_fn):
5353 """Computes the list of nodes and their attributes.
5356 nodenames = self.owned_locks(locking.LEVEL_NODE)
5357 volumes = self.rpc.call_node_volumes(nodenames)
5359 ilist = self.cfg.GetAllInstancesInfo()
5360 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5363 for node in nodenames:
5364 nresult = volumes[node]
5367 msg = nresult.fail_msg
5369 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5372 node_vols = sorted(nresult.payload,
5373 key=operator.itemgetter("dev"))
5375 for vol in node_vols:
5377 for field in self.op.output_fields:
5380 elif field == "phys":
5384 elif field == "name":
5386 elif field == "size":
5387 val = int(float(vol["size"]))
5388 elif field == "instance":
5389 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5391 raise errors.ParameterError(field)
5392 node_output.append(str(val))
5394 output.append(node_output)
5399 class LUNodeQueryStorage(NoHooksLU):
5400 """Logical unit for getting information on storage units on node(s).
5403 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5406 def CheckArguments(self):
5407 _CheckOutputFields(static=self._FIELDS_STATIC,
5408 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5409 selected=self.op.output_fields)
5411 def ExpandNames(self):
5412 self.share_locks = _ShareAll()
5415 self.needed_locks = {
5416 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5419 self.needed_locks = {
5420 locking.LEVEL_NODE: locking.ALL_SET,
5421 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5424 def Exec(self, feedback_fn):
5425 """Computes the list of nodes and their attributes.
5428 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5430 # Always get name to sort by
5431 if constants.SF_NAME in self.op.output_fields:
5432 fields = self.op.output_fields[:]
5434 fields = [constants.SF_NAME] + self.op.output_fields
5436 # Never ask for node or type as it's only known to the LU
5437 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5438 while extra in fields:
5439 fields.remove(extra)
5441 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5442 name_idx = field_idx[constants.SF_NAME]
5444 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5445 data = self.rpc.call_storage_list(self.nodes,
5446 self.op.storage_type, st_args,
5447 self.op.name, fields)
5451 for node in utils.NiceSort(self.nodes):
5452 nresult = data[node]
5456 msg = nresult.fail_msg
5458 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5461 rows = dict([(row[name_idx], row) for row in nresult.payload])
5463 for name in utils.NiceSort(rows.keys()):
5468 for field in self.op.output_fields:
5469 if field == constants.SF_NODE:
5471 elif field == constants.SF_TYPE:
5472 val = self.op.storage_type
5473 elif field in field_idx:
5474 val = row[field_idx[field]]
5476 raise errors.ParameterError(field)
5485 class _InstanceQuery(_QueryBase):
5486 FIELDS = query.INSTANCE_FIELDS
5488 def ExpandNames(self, lu):
5489 lu.needed_locks = {}
5490 lu.share_locks = _ShareAll()
5493 self.wanted = _GetWantedInstances(lu, self.names)
5495 self.wanted = locking.ALL_SET
5497 self.do_locking = (self.use_locking and
5498 query.IQ_LIVE in self.requested_data)
5500 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5501 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5502 lu.needed_locks[locking.LEVEL_NODE] = []
5503 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5505 self.do_grouplocks = (self.do_locking and
5506 query.IQ_NODES in self.requested_data)
5508 def DeclareLocks(self, lu, level):
5510 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5511 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5513 # Lock all groups used by instances optimistically; this requires going
5514 # via the node before it's locked, requiring verification later on
5515 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5517 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5518 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5519 elif level == locking.LEVEL_NODE:
5520 lu._LockInstancesNodes() # pylint: disable=W0212
5523 def _CheckGroupLocks(lu):
5524 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5525 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5527 # Check if node groups for locked instances are still correct
5528 for instance_name in owned_instances:
5529 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5531 def _GetQueryData(self, lu):
5532 """Computes the list of instances and their attributes.
5535 if self.do_grouplocks:
5536 self._CheckGroupLocks(lu)
5538 cluster = lu.cfg.GetClusterInfo()
5539 all_info = lu.cfg.GetAllInstancesInfo()
5541 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5543 instance_list = [all_info[name] for name in instance_names]
5544 nodes = frozenset(itertools.chain(*(inst.all_nodes
5545 for inst in instance_list)))
5546 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5549 wrongnode_inst = set()
5551 # Gather data as requested
5552 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5554 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5556 result = node_data[name]
5558 # offline nodes will be in both lists
5559 assert result.fail_msg
5560 offline_nodes.append(name)
5562 bad_nodes.append(name)
5563 elif result.payload:
5564 for inst in result.payload:
5565 if inst in all_info:
5566 if all_info[inst].primary_node == name:
5567 live_data.update(result.payload)
5569 wrongnode_inst.add(inst)
5571 # orphan instance; we don't list it here as we don't
5572 # handle this case yet in the output of instance listing
5573 logging.warning("Orphan instance '%s' found on node %s",
5575 # else no instance is alive
5579 if query.IQ_DISKUSAGE in self.requested_data:
5580 gmi = ganeti.masterd.instance
5581 disk_usage = dict((inst.name,
5582 gmi.ComputeDiskSize(inst.disk_template,
5583 [{constants.IDISK_SIZE: disk.size}
5584 for disk in inst.disks]))
5585 for inst in instance_list)
5589 if query.IQ_CONSOLE in self.requested_data:
5591 for inst in instance_list:
5592 if inst.name in live_data:
5593 # Instance is running
5594 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5596 consinfo[inst.name] = None
5597 assert set(consinfo.keys()) == set(instance_names)
5601 if query.IQ_NODES in self.requested_data:
5602 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5604 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5605 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5606 for uuid in set(map(operator.attrgetter("group"),
5612 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5613 disk_usage, offline_nodes, bad_nodes,
5614 live_data, wrongnode_inst, consinfo,
5618 class LUQuery(NoHooksLU):
5619 """Query for resources/items of a certain kind.
5622 # pylint: disable=W0142
5625 def CheckArguments(self):
5626 qcls = _GetQueryImplementation(self.op.what)
5628 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5630 def ExpandNames(self):
5631 self.impl.ExpandNames(self)
5633 def DeclareLocks(self, level):
5634 self.impl.DeclareLocks(self, level)
5636 def Exec(self, feedback_fn):
5637 return self.impl.NewStyleQuery(self)
5640 class LUQueryFields(NoHooksLU):
5641 """Query for resources/items of a certain kind.
5644 # pylint: disable=W0142
5647 def CheckArguments(self):
5648 self.qcls = _GetQueryImplementation(self.op.what)
5650 def ExpandNames(self):
5651 self.needed_locks = {}
5653 def Exec(self, feedback_fn):
5654 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5657 class LUNodeModifyStorage(NoHooksLU):
5658 """Logical unit for modifying a storage volume on a node.
5663 def CheckArguments(self):
5664 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5666 storage_type = self.op.storage_type
5669 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5671 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5672 " modified" % storage_type,
5675 diff = set(self.op.changes.keys()) - modifiable
5677 raise errors.OpPrereqError("The following fields can not be modified for"
5678 " storage units of type '%s': %r" %
5679 (storage_type, list(diff)),
5682 def ExpandNames(self):
5683 self.needed_locks = {
5684 locking.LEVEL_NODE: self.op.node_name,
5687 def Exec(self, feedback_fn):
5688 """Computes the list of nodes and their attributes.
5691 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5692 result = self.rpc.call_storage_modify(self.op.node_name,
5693 self.op.storage_type, st_args,
5694 self.op.name, self.op.changes)
5695 result.Raise("Failed to modify storage unit '%s' on %s" %
5696 (self.op.name, self.op.node_name))
5699 class LUNodeAdd(LogicalUnit):
5700 """Logical unit for adding node to the cluster.
5704 HTYPE = constants.HTYPE_NODE
5705 _NFLAGS = ["master_capable", "vm_capable"]
5707 def CheckArguments(self):
5708 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5709 # validate/normalize the node name
5710 self.hostname = netutils.GetHostname(name=self.op.node_name,
5711 family=self.primary_ip_family)
5712 self.op.node_name = self.hostname.name
5714 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5715 raise errors.OpPrereqError("Cannot readd the master node",
5718 if self.op.readd and self.op.group:
5719 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5720 " being readded", errors.ECODE_INVAL)
5722 def BuildHooksEnv(self):
5725 This will run on all nodes before, and on all nodes + the new node after.
5729 "OP_TARGET": self.op.node_name,
5730 "NODE_NAME": self.op.node_name,
5731 "NODE_PIP": self.op.primary_ip,
5732 "NODE_SIP": self.op.secondary_ip,
5733 "MASTER_CAPABLE": str(self.op.master_capable),
5734 "VM_CAPABLE": str(self.op.vm_capable),
5737 def BuildHooksNodes(self):
5738 """Build hooks nodes.
5741 # Exclude added node
5742 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5743 post_nodes = pre_nodes + [self.op.node_name, ]
5745 return (pre_nodes, post_nodes)
5747 def CheckPrereq(self):
5748 """Check prerequisites.
5751 - the new node is not already in the config
5753 - its parameters (single/dual homed) matches the cluster
5755 Any errors are signaled by raising errors.OpPrereqError.
5759 hostname = self.hostname
5760 node = hostname.name
5761 primary_ip = self.op.primary_ip = hostname.ip
5762 if self.op.secondary_ip is None:
5763 if self.primary_ip_family == netutils.IP6Address.family:
5764 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5765 " IPv4 address must be given as secondary",
5767 self.op.secondary_ip = primary_ip
5769 secondary_ip = self.op.secondary_ip
5770 if not netutils.IP4Address.IsValid(secondary_ip):
5771 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5772 " address" % secondary_ip, errors.ECODE_INVAL)
5774 node_list = cfg.GetNodeList()
5775 if not self.op.readd and node in node_list:
5776 raise errors.OpPrereqError("Node %s is already in the configuration" %
5777 node, errors.ECODE_EXISTS)
5778 elif self.op.readd and node not in node_list:
5779 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5782 self.changed_primary_ip = False
5784 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5785 if self.op.readd and node == existing_node_name:
5786 if existing_node.secondary_ip != secondary_ip:
5787 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5788 " address configuration as before",
5790 if existing_node.primary_ip != primary_ip:
5791 self.changed_primary_ip = True
5795 if (existing_node.primary_ip == primary_ip or
5796 existing_node.secondary_ip == primary_ip or
5797 existing_node.primary_ip == secondary_ip or
5798 existing_node.secondary_ip == secondary_ip):
5799 raise errors.OpPrereqError("New node ip address(es) conflict with"
5800 " existing node %s" % existing_node.name,
5801 errors.ECODE_NOTUNIQUE)
5803 # After this 'if' block, None is no longer a valid value for the
5804 # _capable op attributes
5806 old_node = self.cfg.GetNodeInfo(node)
5807 assert old_node is not None, "Can't retrieve locked node %s" % node
5808 for attr in self._NFLAGS:
5809 if getattr(self.op, attr) is None:
5810 setattr(self.op, attr, getattr(old_node, attr))
5812 for attr in self._NFLAGS:
5813 if getattr(self.op, attr) is None:
5814 setattr(self.op, attr, True)
5816 if self.op.readd and not self.op.vm_capable:
5817 pri, sec = cfg.GetNodeInstances(node)
5819 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5820 " flag set to false, but it already holds"
5821 " instances" % node,
5824 # check that the type of the node (single versus dual homed) is the
5825 # same as for the master
5826 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5827 master_singlehomed = myself.secondary_ip == myself.primary_ip
5828 newbie_singlehomed = secondary_ip == primary_ip
5829 if master_singlehomed != newbie_singlehomed:
5830 if master_singlehomed:
5831 raise errors.OpPrereqError("The master has no secondary ip but the"
5832 " new node has one",
5835 raise errors.OpPrereqError("The master has a secondary ip but the"
5836 " new node doesn't have one",
5839 # checks reachability
5840 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5841 raise errors.OpPrereqError("Node not reachable by ping",
5842 errors.ECODE_ENVIRON)
5844 if not newbie_singlehomed:
5845 # check reachability from my secondary ip to newbie's secondary ip
5846 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5847 source=myself.secondary_ip):
5848 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5849 " based ping to node daemon port",
5850 errors.ECODE_ENVIRON)
5857 if self.op.master_capable:
5858 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5860 self.master_candidate = False
5863 self.new_node = old_node
5865 node_group = cfg.LookupNodeGroup(self.op.group)
5866 self.new_node = objects.Node(name=node,
5867 primary_ip=primary_ip,
5868 secondary_ip=secondary_ip,
5869 master_candidate=self.master_candidate,
5870 offline=False, drained=False,
5873 if self.op.ndparams:
5874 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5876 if self.op.hv_state:
5877 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5879 if self.op.disk_state:
5880 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5882 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5883 # it a property on the base class.
5884 result = rpc.DnsOnlyRunner().call_version([node])[node]
5885 result.Raise("Can't get version information from node %s" % node)
5886 if constants.PROTOCOL_VERSION == result.payload:
5887 logging.info("Communication to node %s fine, sw version %s match",
5888 node, result.payload)
5890 raise errors.OpPrereqError("Version mismatch master version %s,"
5891 " node version %s" %
5892 (constants.PROTOCOL_VERSION, result.payload),
5893 errors.ECODE_ENVIRON)
5895 def Exec(self, feedback_fn):
5896 """Adds the new node to the cluster.
5899 new_node = self.new_node
5900 node = new_node.name
5902 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5905 # We adding a new node so we assume it's powered
5906 new_node.powered = True
5908 # for re-adds, reset the offline/drained/master-candidate flags;
5909 # we need to reset here, otherwise offline would prevent RPC calls
5910 # later in the procedure; this also means that if the re-add
5911 # fails, we are left with a non-offlined, broken node
5913 new_node.drained = new_node.offline = False # pylint: disable=W0201
5914 self.LogInfo("Readding a node, the offline/drained flags were reset")
5915 # if we demote the node, we do cleanup later in the procedure
5916 new_node.master_candidate = self.master_candidate
5917 if self.changed_primary_ip:
5918 new_node.primary_ip = self.op.primary_ip
5920 # copy the master/vm_capable flags
5921 for attr in self._NFLAGS:
5922 setattr(new_node, attr, getattr(self.op, attr))
5924 # notify the user about any possible mc promotion
5925 if new_node.master_candidate:
5926 self.LogInfo("Node will be a master candidate")
5928 if self.op.ndparams:
5929 new_node.ndparams = self.op.ndparams
5931 new_node.ndparams = {}
5933 if self.op.hv_state:
5934 new_node.hv_state_static = self.new_hv_state
5936 if self.op.disk_state:
5937 new_node.disk_state_static = self.new_disk_state
5939 # Add node to our /etc/hosts, and add key to known_hosts
5940 if self.cfg.GetClusterInfo().modify_etc_hosts:
5941 master_node = self.cfg.GetMasterNode()
5942 result = self.rpc.call_etc_hosts_modify(master_node,
5943 constants.ETC_HOSTS_ADD,
5946 result.Raise("Can't update hosts file with new host data")
5948 if new_node.secondary_ip != new_node.primary_ip:
5949 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5952 node_verify_list = [self.cfg.GetMasterNode()]
5953 node_verify_param = {
5954 constants.NV_NODELIST: ([node], {}),
5955 # TODO: do a node-net-test as well?
5958 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5959 self.cfg.GetClusterName())
5960 for verifier in node_verify_list:
5961 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5962 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5964 for failed in nl_payload:
5965 feedback_fn("ssh/hostname verification failed"
5966 " (checking from %s): %s" %
5967 (verifier, nl_payload[failed]))
5968 raise errors.OpExecError("ssh/hostname verification failed")
5971 _RedistributeAncillaryFiles(self)
5972 self.context.ReaddNode(new_node)
5973 # make sure we redistribute the config
5974 self.cfg.Update(new_node, feedback_fn)
5975 # and make sure the new node will not have old files around
5976 if not new_node.master_candidate:
5977 result = self.rpc.call_node_demote_from_mc(new_node.name)
5978 msg = result.fail_msg
5980 self.LogWarning("Node failed to demote itself from master"
5981 " candidate status: %s" % msg)
5983 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5984 additional_vm=self.op.vm_capable)
5985 self.context.AddNode(new_node, self.proc.GetECId())
5988 class LUNodeSetParams(LogicalUnit):
5989 """Modifies the parameters of a node.
5991 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5992 to the node role (as _ROLE_*)
5993 @cvar _R2F: a dictionary from node role to tuples of flags
5994 @cvar _FLAGS: a list of attribute names corresponding to the flags
5997 HPATH = "node-modify"
5998 HTYPE = constants.HTYPE_NODE
6000 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6002 (True, False, False): _ROLE_CANDIDATE,
6003 (False, True, False): _ROLE_DRAINED,
6004 (False, False, True): _ROLE_OFFLINE,
6005 (False, False, False): _ROLE_REGULAR,
6007 _R2F = dict((v, k) for k, v in _F2R.items())
6008 _FLAGS = ["master_candidate", "drained", "offline"]
6010 def CheckArguments(self):
6011 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6012 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6013 self.op.master_capable, self.op.vm_capable,
6014 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6016 if all_mods.count(None) == len(all_mods):
6017 raise errors.OpPrereqError("Please pass at least one modification",
6019 if all_mods.count(True) > 1:
6020 raise errors.OpPrereqError("Can't set the node into more than one"
6021 " state at the same time",
6024 # Boolean value that tells us whether we might be demoting from MC
6025 self.might_demote = (self.op.master_candidate is False or
6026 self.op.offline is True or
6027 self.op.drained is True or
6028 self.op.master_capable is False)
6030 if self.op.secondary_ip:
6031 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6032 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6033 " address" % self.op.secondary_ip,
6036 self.lock_all = self.op.auto_promote and self.might_demote
6037 self.lock_instances = self.op.secondary_ip is not None
6039 def _InstanceFilter(self, instance):
6040 """Filter for getting affected instances.
6043 return (instance.disk_template in constants.DTS_INT_MIRROR and
6044 self.op.node_name in instance.all_nodes)
6046 def ExpandNames(self):
6048 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6050 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6052 # Since modifying a node can have severe effects on currently running
6053 # operations the resource lock is at least acquired in shared mode
6054 self.needed_locks[locking.LEVEL_NODE_RES] = \
6055 self.needed_locks[locking.LEVEL_NODE]
6057 # Get node resource and instance locks in shared mode; they are not used
6058 # for anything but read-only access
6059 self.share_locks[locking.LEVEL_NODE_RES] = 1
6060 self.share_locks[locking.LEVEL_INSTANCE] = 1
6062 if self.lock_instances:
6063 self.needed_locks[locking.LEVEL_INSTANCE] = \
6064 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6066 def BuildHooksEnv(self):
6069 This runs on the master node.
6073 "OP_TARGET": self.op.node_name,
6074 "MASTER_CANDIDATE": str(self.op.master_candidate),
6075 "OFFLINE": str(self.op.offline),
6076 "DRAINED": str(self.op.drained),
6077 "MASTER_CAPABLE": str(self.op.master_capable),
6078 "VM_CAPABLE": str(self.op.vm_capable),
6081 def BuildHooksNodes(self):
6082 """Build hooks nodes.
6085 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6088 def CheckPrereq(self):
6089 """Check prerequisites.
6091 This only checks the instance list against the existing names.
6094 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6096 if self.lock_instances:
6097 affected_instances = \
6098 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6100 # Verify instance locks
6101 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6102 wanted_instances = frozenset(affected_instances.keys())
6103 if wanted_instances - owned_instances:
6104 raise errors.OpPrereqError("Instances affected by changing node %s's"
6105 " secondary IP address have changed since"
6106 " locks were acquired, wanted '%s', have"
6107 " '%s'; retry the operation" %
6109 utils.CommaJoin(wanted_instances),
6110 utils.CommaJoin(owned_instances)),
6113 affected_instances = None
6115 if (self.op.master_candidate is not None or
6116 self.op.drained is not None or
6117 self.op.offline is not None):
6118 # we can't change the master's node flags
6119 if self.op.node_name == self.cfg.GetMasterNode():
6120 raise errors.OpPrereqError("The master role can be changed"
6121 " only via master-failover",
6124 if self.op.master_candidate and not node.master_capable:
6125 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6126 " it a master candidate" % node.name,
6129 if self.op.vm_capable is False:
6130 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6132 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6133 " the vm_capable flag" % node.name,
6136 if node.master_candidate and self.might_demote and not self.lock_all:
6137 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6138 # check if after removing the current node, we're missing master
6140 (mc_remaining, mc_should, _) = \
6141 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6142 if mc_remaining < mc_should:
6143 raise errors.OpPrereqError("Not enough master candidates, please"
6144 " pass auto promote option to allow"
6145 " promotion (--auto-promote or RAPI"
6146 " auto_promote=True)", errors.ECODE_STATE)
6148 self.old_flags = old_flags = (node.master_candidate,
6149 node.drained, node.offline)
6150 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6151 self.old_role = old_role = self._F2R[old_flags]
6153 # Check for ineffective changes
6154 for attr in self._FLAGS:
6155 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6156 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6157 setattr(self.op, attr, None)
6159 # Past this point, any flag change to False means a transition
6160 # away from the respective state, as only real changes are kept
6162 # TODO: We might query the real power state if it supports OOB
6163 if _SupportsOob(self.cfg, node):
6164 if self.op.offline is False and not (node.powered or
6165 self.op.powered is True):
6166 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6167 " offline status can be reset") %
6168 self.op.node_name, errors.ECODE_STATE)
6169 elif self.op.powered is not None:
6170 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6171 " as it does not support out-of-band"
6172 " handling") % self.op.node_name,
6175 # If we're being deofflined/drained, we'll MC ourself if needed
6176 if (self.op.drained is False or self.op.offline is False or
6177 (self.op.master_capable and not node.master_capable)):
6178 if _DecideSelfPromotion(self):
6179 self.op.master_candidate = True
6180 self.LogInfo("Auto-promoting node to master candidate")
6182 # If we're no longer master capable, we'll demote ourselves from MC
6183 if self.op.master_capable is False and node.master_candidate:
6184 self.LogInfo("Demoting from master candidate")
6185 self.op.master_candidate = False
6188 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6189 if self.op.master_candidate:
6190 new_role = self._ROLE_CANDIDATE
6191 elif self.op.drained:
6192 new_role = self._ROLE_DRAINED
6193 elif self.op.offline:
6194 new_role = self._ROLE_OFFLINE
6195 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6196 # False is still in new flags, which means we're un-setting (the
6198 new_role = self._ROLE_REGULAR
6199 else: # no new flags, nothing, keep old role
6202 self.new_role = new_role
6204 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6205 # Trying to transition out of offline status
6206 result = self.rpc.call_version([node.name])[node.name]
6208 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6209 " to report its version: %s" %
6210 (node.name, result.fail_msg),
6213 self.LogWarning("Transitioning node from offline to online state"
6214 " without using re-add. Please make sure the node"
6217 # When changing the secondary ip, verify if this is a single-homed to
6218 # multi-homed transition or vice versa, and apply the relevant
6220 if self.op.secondary_ip:
6221 # Ok even without locking, because this can't be changed by any LU
6222 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6223 master_singlehomed = master.secondary_ip == master.primary_ip
6224 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6225 if self.op.force and node.name == master.name:
6226 self.LogWarning("Transitioning from single-homed to multi-homed"
6227 " cluster; all nodes will require a secondary IP"
6230 raise errors.OpPrereqError("Changing the secondary ip on a"
6231 " single-homed cluster requires the"
6232 " --force option to be passed, and the"
6233 " target node to be the master",
6235 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6236 if self.op.force and node.name == master.name:
6237 self.LogWarning("Transitioning from multi-homed to single-homed"
6238 " cluster; secondary IP addresses will have to be"
6241 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6242 " same as the primary IP on a multi-homed"
6243 " cluster, unless the --force option is"
6244 " passed, and the target node is the"
6245 " master", errors.ECODE_INVAL)
6247 assert not (frozenset(affected_instances) -
6248 self.owned_locks(locking.LEVEL_INSTANCE))
6251 if affected_instances:
6252 msg = ("Cannot change secondary IP address: offline node has"
6253 " instances (%s) configured to use it" %
6254 utils.CommaJoin(affected_instances.keys()))
6255 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6257 # On online nodes, check that no instances are running, and that
6258 # the node has the new ip and we can reach it.
6259 for instance in affected_instances.values():
6260 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6261 msg="cannot change secondary ip")
6263 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6264 if master.name != node.name:
6265 # check reachability from master secondary ip to new secondary ip
6266 if not netutils.TcpPing(self.op.secondary_ip,
6267 constants.DEFAULT_NODED_PORT,
6268 source=master.secondary_ip):
6269 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6270 " based ping to node daemon port",
6271 errors.ECODE_ENVIRON)
6273 if self.op.ndparams:
6274 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6275 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6276 self.new_ndparams = new_ndparams
6278 if self.op.hv_state:
6279 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6280 self.node.hv_state_static)
6282 if self.op.disk_state:
6283 self.new_disk_state = \
6284 _MergeAndVerifyDiskState(self.op.disk_state,
6285 self.node.disk_state_static)
6287 def Exec(self, feedback_fn):
6292 old_role = self.old_role
6293 new_role = self.new_role
6297 if self.op.ndparams:
6298 node.ndparams = self.new_ndparams
6300 if self.op.powered is not None:
6301 node.powered = self.op.powered
6303 if self.op.hv_state:
6304 node.hv_state_static = self.new_hv_state
6306 if self.op.disk_state:
6307 node.disk_state_static = self.new_disk_state
6309 for attr in ["master_capable", "vm_capable"]:
6310 val = getattr(self.op, attr)
6312 setattr(node, attr, val)
6313 result.append((attr, str(val)))
6315 if new_role != old_role:
6316 # Tell the node to demote itself, if no longer MC and not offline
6317 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6318 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6320 self.LogWarning("Node failed to demote itself: %s", msg)
6322 new_flags = self._R2F[new_role]
6323 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6325 result.append((desc, str(nf)))
6326 (node.master_candidate, node.drained, node.offline) = new_flags
6328 # we locked all nodes, we adjust the CP before updating this node
6330 _AdjustCandidatePool(self, [node.name])
6332 if self.op.secondary_ip:
6333 node.secondary_ip = self.op.secondary_ip
6334 result.append(("secondary_ip", self.op.secondary_ip))
6336 # this will trigger configuration file update, if needed
6337 self.cfg.Update(node, feedback_fn)
6339 # this will trigger job queue propagation or cleanup if the mc
6341 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6342 self.context.ReaddNode(node)
6347 class LUNodePowercycle(NoHooksLU):
6348 """Powercycles a node.
6353 def CheckArguments(self):
6354 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6355 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6356 raise errors.OpPrereqError("The node is the master and the force"
6357 " parameter was not set",
6360 def ExpandNames(self):
6361 """Locking for PowercycleNode.
6363 This is a last-resort option and shouldn't block on other
6364 jobs. Therefore, we grab no locks.
6367 self.needed_locks = {}
6369 def Exec(self, feedback_fn):
6373 result = self.rpc.call_node_powercycle(self.op.node_name,
6374 self.cfg.GetHypervisorType())
6375 result.Raise("Failed to schedule the reboot")
6376 return result.payload
6379 class LUClusterQuery(NoHooksLU):
6380 """Query cluster configuration.
6385 def ExpandNames(self):
6386 self.needed_locks = {}
6388 def Exec(self, feedback_fn):
6389 """Return cluster config.
6392 cluster = self.cfg.GetClusterInfo()
6395 # Filter just for enabled hypervisors
6396 for os_name, hv_dict in cluster.os_hvp.items():
6397 os_hvp[os_name] = {}
6398 for hv_name, hv_params in hv_dict.items():
6399 if hv_name in cluster.enabled_hypervisors:
6400 os_hvp[os_name][hv_name] = hv_params
6402 # Convert ip_family to ip_version
6403 primary_ip_version = constants.IP4_VERSION
6404 if cluster.primary_ip_family == netutils.IP6Address.family:
6405 primary_ip_version = constants.IP6_VERSION
6408 "software_version": constants.RELEASE_VERSION,
6409 "protocol_version": constants.PROTOCOL_VERSION,
6410 "config_version": constants.CONFIG_VERSION,
6411 "os_api_version": max(constants.OS_API_VERSIONS),
6412 "export_version": constants.EXPORT_VERSION,
6413 "architecture": runtime.GetArchInfo(),
6414 "name": cluster.cluster_name,
6415 "master": cluster.master_node,
6416 "default_hypervisor": cluster.primary_hypervisor,
6417 "enabled_hypervisors": cluster.enabled_hypervisors,
6418 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6419 for hypervisor_name in cluster.enabled_hypervisors]),
6421 "beparams": cluster.beparams,
6422 "osparams": cluster.osparams,
6423 "ipolicy": cluster.ipolicy,
6424 "nicparams": cluster.nicparams,
6425 "ndparams": cluster.ndparams,
6426 "diskparams": cluster.diskparams,
6427 "candidate_pool_size": cluster.candidate_pool_size,
6428 "master_netdev": cluster.master_netdev,
6429 "master_netmask": cluster.master_netmask,
6430 "use_external_mip_script": cluster.use_external_mip_script,
6431 "volume_group_name": cluster.volume_group_name,
6432 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6433 "file_storage_dir": cluster.file_storage_dir,
6434 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6435 "maintain_node_health": cluster.maintain_node_health,
6436 "ctime": cluster.ctime,
6437 "mtime": cluster.mtime,
6438 "uuid": cluster.uuid,
6439 "tags": list(cluster.GetTags()),
6440 "uid_pool": cluster.uid_pool,
6441 "default_iallocator": cluster.default_iallocator,
6442 "reserved_lvs": cluster.reserved_lvs,
6443 "primary_ip_version": primary_ip_version,
6444 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6445 "hidden_os": cluster.hidden_os,
6446 "blacklisted_os": cluster.blacklisted_os,
6452 class LUClusterConfigQuery(NoHooksLU):
6453 """Return configuration values.
6458 def CheckArguments(self):
6459 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6461 def ExpandNames(self):
6462 self.cq.ExpandNames(self)
6464 def DeclareLocks(self, level):
6465 self.cq.DeclareLocks(self, level)
6467 def Exec(self, feedback_fn):
6468 result = self.cq.OldStyleQuery(self)
6470 assert len(result) == 1
6475 class _ClusterQuery(_QueryBase):
6476 FIELDS = query.CLUSTER_FIELDS
6478 #: Do not sort (there is only one item)
6481 def ExpandNames(self, lu):
6482 lu.needed_locks = {}
6484 # The following variables interact with _QueryBase._GetNames
6485 self.wanted = locking.ALL_SET
6486 self.do_locking = self.use_locking
6489 raise errors.OpPrereqError("Can not use locking for cluster queries",
6492 def DeclareLocks(self, lu, level):
6495 def _GetQueryData(self, lu):
6496 """Computes the list of nodes and their attributes.
6499 # Locking is not used
6500 assert not (compat.any(lu.glm.is_owned(level)
6501 for level in locking.LEVELS
6502 if level != locking.LEVEL_CLUSTER) or
6503 self.do_locking or self.use_locking)
6505 if query.CQ_CONFIG in self.requested_data:
6506 cluster = lu.cfg.GetClusterInfo()
6508 cluster = NotImplemented
6510 if query.CQ_QUEUE_DRAINED in self.requested_data:
6511 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6513 drain_flag = NotImplemented
6515 if query.CQ_WATCHER_PAUSE in self.requested_data:
6516 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6518 watcher_pause = NotImplemented
6520 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6523 class LUInstanceActivateDisks(NoHooksLU):
6524 """Bring up an instance's disks.
6529 def ExpandNames(self):
6530 self._ExpandAndLockInstance()
6531 self.needed_locks[locking.LEVEL_NODE] = []
6532 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6534 def DeclareLocks(self, level):
6535 if level == locking.LEVEL_NODE:
6536 self._LockInstancesNodes()
6538 def CheckPrereq(self):
6539 """Check prerequisites.
6541 This checks that the instance is in the cluster.
6544 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6545 assert self.instance is not None, \
6546 "Cannot retrieve locked instance %s" % self.op.instance_name
6547 _CheckNodeOnline(self, self.instance.primary_node)
6549 def Exec(self, feedback_fn):
6550 """Activate the disks.
6553 disks_ok, disks_info = \
6554 _AssembleInstanceDisks(self, self.instance,
6555 ignore_size=self.op.ignore_size)
6557 raise errors.OpExecError("Cannot activate block devices")
6559 if self.op.wait_for_sync:
6560 if not _WaitForSync(self, self.instance):
6561 raise errors.OpExecError("Some disks of the instance are degraded!")
6566 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6568 """Prepare the block devices for an instance.
6570 This sets up the block devices on all nodes.
6572 @type lu: L{LogicalUnit}
6573 @param lu: the logical unit on whose behalf we execute
6574 @type instance: L{objects.Instance}
6575 @param instance: the instance for whose disks we assemble
6576 @type disks: list of L{objects.Disk} or None
6577 @param disks: which disks to assemble (or all, if None)
6578 @type ignore_secondaries: boolean
6579 @param ignore_secondaries: if true, errors on secondary nodes
6580 won't result in an error return from the function
6581 @type ignore_size: boolean
6582 @param ignore_size: if true, the current known size of the disk
6583 will not be used during the disk activation, useful for cases
6584 when the size is wrong
6585 @return: False if the operation failed, otherwise a list of
6586 (host, instance_visible_name, node_visible_name)
6587 with the mapping from node devices to instance devices
6592 iname = instance.name
6593 disks = _ExpandCheckDisks(instance, disks)
6595 # With the two passes mechanism we try to reduce the window of
6596 # opportunity for the race condition of switching DRBD to primary
6597 # before handshaking occured, but we do not eliminate it
6599 # The proper fix would be to wait (with some limits) until the
6600 # connection has been made and drbd transitions from WFConnection
6601 # into any other network-connected state (Connected, SyncTarget,
6604 # 1st pass, assemble on all nodes in secondary mode
6605 for idx, inst_disk in enumerate(disks):
6606 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6608 node_disk = node_disk.Copy()
6609 node_disk.UnsetSize()
6610 lu.cfg.SetDiskID(node_disk, node)
6611 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6613 msg = result.fail_msg
6615 is_offline_secondary = (node in instance.secondary_nodes and
6617 lu.LogWarning("Could not prepare block device %s on node %s"
6618 " (is_primary=False, pass=1): %s",
6619 inst_disk.iv_name, node, msg)
6620 if not (ignore_secondaries or is_offline_secondary):
6623 # FIXME: race condition on drbd migration to primary
6625 # 2nd pass, do only the primary node
6626 for idx, inst_disk in enumerate(disks):
6629 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6630 if node != instance.primary_node:
6633 node_disk = node_disk.Copy()
6634 node_disk.UnsetSize()
6635 lu.cfg.SetDiskID(node_disk, node)
6636 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6638 msg = result.fail_msg
6640 lu.LogWarning("Could not prepare block device %s on node %s"
6641 " (is_primary=True, pass=2): %s",
6642 inst_disk.iv_name, node, msg)
6645 dev_path = result.payload
6647 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6649 # leave the disks configured for the primary node
6650 # this is a workaround that would be fixed better by
6651 # improving the logical/physical id handling
6653 lu.cfg.SetDiskID(disk, instance.primary_node)
6655 return disks_ok, device_info
6658 def _StartInstanceDisks(lu, instance, force):
6659 """Start the disks of an instance.
6662 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6663 ignore_secondaries=force)
6665 _ShutdownInstanceDisks(lu, instance)
6666 if force is not None and not force:
6668 hint=("If the message above refers to a secondary node,"
6669 " you can retry the operation using '--force'"))
6670 raise errors.OpExecError("Disk consistency error")
6673 class LUInstanceDeactivateDisks(NoHooksLU):
6674 """Shutdown an instance's disks.
6679 def ExpandNames(self):
6680 self._ExpandAndLockInstance()
6681 self.needed_locks[locking.LEVEL_NODE] = []
6682 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6684 def DeclareLocks(self, level):
6685 if level == locking.LEVEL_NODE:
6686 self._LockInstancesNodes()
6688 def CheckPrereq(self):
6689 """Check prerequisites.
6691 This checks that the instance is in the cluster.
6694 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6695 assert self.instance is not None, \
6696 "Cannot retrieve locked instance %s" % self.op.instance_name
6698 def Exec(self, feedback_fn):
6699 """Deactivate the disks
6702 instance = self.instance
6704 _ShutdownInstanceDisks(self, instance)
6706 _SafeShutdownInstanceDisks(self, instance)
6709 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6710 """Shutdown block devices of an instance.
6712 This function checks if an instance is running, before calling
6713 _ShutdownInstanceDisks.
6716 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6717 _ShutdownInstanceDisks(lu, instance, disks=disks)
6720 def _ExpandCheckDisks(instance, disks):
6721 """Return the instance disks selected by the disks list
6723 @type disks: list of L{objects.Disk} or None
6724 @param disks: selected disks
6725 @rtype: list of L{objects.Disk}
6726 @return: selected instance disks to act on
6730 return instance.disks
6732 if not set(disks).issubset(instance.disks):
6733 raise errors.ProgrammerError("Can only act on disks belonging to the"
6738 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6739 """Shutdown block devices of an instance.
6741 This does the shutdown on all nodes of the instance.
6743 If the ignore_primary is false, errors on the primary node are
6748 disks = _ExpandCheckDisks(instance, disks)
6751 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6752 lu.cfg.SetDiskID(top_disk, node)
6753 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6754 msg = result.fail_msg
6756 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6757 disk.iv_name, node, msg)
6758 if ((node == instance.primary_node and not ignore_primary) or
6759 (node != instance.primary_node and not result.offline)):
6764 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6765 """Checks if a node has enough free memory.
6767 This function check if a given node has the needed amount of free
6768 memory. In case the node has less memory or we cannot get the
6769 information from the node, this function raise an OpPrereqError
6772 @type lu: C{LogicalUnit}
6773 @param lu: a logical unit from which we get configuration data
6775 @param node: the node to check
6776 @type reason: C{str}
6777 @param reason: string to use in the error message
6778 @type requested: C{int}
6779 @param requested: the amount of memory in MiB to check for
6780 @type hypervisor_name: C{str}
6781 @param hypervisor_name: the hypervisor to ask for memory stats
6783 @return: node current free memory
6784 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6785 we cannot check the node
6788 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6789 nodeinfo[node].Raise("Can't get data from node %s" % node,
6790 prereq=True, ecode=errors.ECODE_ENVIRON)
6791 (_, _, (hv_info, )) = nodeinfo[node].payload
6793 free_mem = hv_info.get("memory_free", None)
6794 if not isinstance(free_mem, int):
6795 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6796 " was '%s'" % (node, free_mem),
6797 errors.ECODE_ENVIRON)
6798 if requested > free_mem:
6799 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6800 " needed %s MiB, available %s MiB" %
6801 (node, reason, requested, free_mem),
6806 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6807 """Checks if nodes have enough free disk space in the all VGs.
6809 This function check if all given nodes have the needed amount of
6810 free disk. In case any node has less disk or we cannot get the
6811 information from the node, this function raise an OpPrereqError
6814 @type lu: C{LogicalUnit}
6815 @param lu: a logical unit from which we get configuration data
6816 @type nodenames: C{list}
6817 @param nodenames: the list of node names to check
6818 @type req_sizes: C{dict}
6819 @param req_sizes: the hash of vg and corresponding amount of disk in
6821 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6822 or we cannot check the node
6825 for vg, req_size in req_sizes.items():
6826 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6829 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6830 """Checks if nodes have enough free disk space in the specified VG.
6832 This function check if all given nodes have the needed amount of
6833 free disk. In case any node has less disk or we cannot get the
6834 information from the node, this function raise an OpPrereqError
6837 @type lu: C{LogicalUnit}
6838 @param lu: a logical unit from which we get configuration data
6839 @type nodenames: C{list}
6840 @param nodenames: the list of node names to check
6842 @param vg: the volume group to check
6843 @type requested: C{int}
6844 @param requested: the amount of disk in MiB to check for
6845 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6846 or we cannot check the node
6849 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6850 for node in nodenames:
6851 info = nodeinfo[node]
6852 info.Raise("Cannot get current information from node %s" % node,
6853 prereq=True, ecode=errors.ECODE_ENVIRON)
6854 (_, (vg_info, ), _) = info.payload
6855 vg_free = vg_info.get("vg_free", None)
6856 if not isinstance(vg_free, int):
6857 raise errors.OpPrereqError("Can't compute free disk space on node"
6858 " %s for vg %s, result was '%s'" %
6859 (node, vg, vg_free), errors.ECODE_ENVIRON)
6860 if requested > vg_free:
6861 raise errors.OpPrereqError("Not enough disk space on target node %s"
6862 " vg %s: required %d MiB, available %d MiB" %
6863 (node, vg, requested, vg_free),
6867 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6868 """Checks if nodes have enough physical CPUs
6870 This function checks if all given nodes have the needed number of
6871 physical CPUs. In case any node has less CPUs or we cannot get the
6872 information from the node, this function raises an OpPrereqError
6875 @type lu: C{LogicalUnit}
6876 @param lu: a logical unit from which we get configuration data
6877 @type nodenames: C{list}
6878 @param nodenames: the list of node names to check
6879 @type requested: C{int}
6880 @param requested: the minimum acceptable number of physical CPUs
6881 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6882 or we cannot check the node
6885 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6886 for node in nodenames:
6887 info = nodeinfo[node]
6888 info.Raise("Cannot get current information from node %s" % node,
6889 prereq=True, ecode=errors.ECODE_ENVIRON)
6890 (_, _, (hv_info, )) = info.payload
6891 num_cpus = hv_info.get("cpu_total", None)
6892 if not isinstance(num_cpus, int):
6893 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6894 " on node %s, result was '%s'" %
6895 (node, num_cpus), errors.ECODE_ENVIRON)
6896 if requested > num_cpus:
6897 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6898 "required" % (node, num_cpus, requested),
6902 class LUInstanceStartup(LogicalUnit):
6903 """Starts an instance.
6906 HPATH = "instance-start"
6907 HTYPE = constants.HTYPE_INSTANCE
6910 def CheckArguments(self):
6912 if self.op.beparams:
6913 # fill the beparams dict
6914 objects.UpgradeBeParams(self.op.beparams)
6915 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6917 def ExpandNames(self):
6918 self._ExpandAndLockInstance()
6919 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6921 def DeclareLocks(self, level):
6922 if level == locking.LEVEL_NODE_RES:
6923 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6925 def BuildHooksEnv(self):
6928 This runs on master, primary and secondary nodes of the instance.
6932 "FORCE": self.op.force,
6935 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6939 def BuildHooksNodes(self):
6940 """Build hooks nodes.
6943 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6946 def CheckPrereq(self):
6947 """Check prerequisites.
6949 This checks that the instance is in the cluster.
6952 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6953 assert self.instance is not None, \
6954 "Cannot retrieve locked instance %s" % self.op.instance_name
6957 if self.op.hvparams:
6958 # check hypervisor parameter syntax (locally)
6959 cluster = self.cfg.GetClusterInfo()
6960 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6961 filled_hvp = cluster.FillHV(instance)
6962 filled_hvp.update(self.op.hvparams)
6963 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6964 hv_type.CheckParameterSyntax(filled_hvp)
6965 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6967 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6969 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6971 if self.primary_offline and self.op.ignore_offline_nodes:
6972 self.LogWarning("Ignoring offline primary node")
6974 if self.op.hvparams or self.op.beparams:
6975 self.LogWarning("Overridden parameters are ignored")
6977 _CheckNodeOnline(self, instance.primary_node)
6979 bep = self.cfg.GetClusterInfo().FillBE(instance)
6980 bep.update(self.op.beparams)
6982 # check bridges existence
6983 _CheckInstanceBridgesExist(self, instance)
6985 remote_info = self.rpc.call_instance_info(instance.primary_node,
6987 instance.hypervisor)
6988 remote_info.Raise("Error checking node %s" % instance.primary_node,
6989 prereq=True, ecode=errors.ECODE_ENVIRON)
6990 if not remote_info.payload: # not running already
6991 _CheckNodeFreeMemory(self, instance.primary_node,
6992 "starting instance %s" % instance.name,
6993 bep[constants.BE_MINMEM], instance.hypervisor)
6995 def Exec(self, feedback_fn):
6996 """Start the instance.
6999 instance = self.instance
7000 force = self.op.force
7002 if not self.op.no_remember:
7003 self.cfg.MarkInstanceUp(instance.name)
7005 if self.primary_offline:
7006 assert self.op.ignore_offline_nodes
7007 self.LogInfo("Primary node offline, marked instance as started")
7009 node_current = instance.primary_node
7011 _StartInstanceDisks(self, instance, force)
7014 self.rpc.call_instance_start(node_current,
7015 (instance, self.op.hvparams,
7017 self.op.startup_paused)
7018 msg = result.fail_msg
7020 _ShutdownInstanceDisks(self, instance)
7021 raise errors.OpExecError("Could not start instance: %s" % msg)
7024 class LUInstanceReboot(LogicalUnit):
7025 """Reboot an instance.
7028 HPATH = "instance-reboot"
7029 HTYPE = constants.HTYPE_INSTANCE
7032 def ExpandNames(self):
7033 self._ExpandAndLockInstance()
7035 def BuildHooksEnv(self):
7038 This runs on master, primary and secondary nodes of the instance.
7042 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7043 "REBOOT_TYPE": self.op.reboot_type,
7044 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7047 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7051 def BuildHooksNodes(self):
7052 """Build hooks nodes.
7055 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7058 def CheckPrereq(self):
7059 """Check prerequisites.
7061 This checks that the instance is in the cluster.
7064 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7065 assert self.instance is not None, \
7066 "Cannot retrieve locked instance %s" % self.op.instance_name
7067 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7068 _CheckNodeOnline(self, instance.primary_node)
7070 # check bridges existence
7071 _CheckInstanceBridgesExist(self, instance)
7073 def Exec(self, feedback_fn):
7074 """Reboot the instance.
7077 instance = self.instance
7078 ignore_secondaries = self.op.ignore_secondaries
7079 reboot_type = self.op.reboot_type
7081 remote_info = self.rpc.call_instance_info(instance.primary_node,
7083 instance.hypervisor)
7084 remote_info.Raise("Error checking node %s" % instance.primary_node)
7085 instance_running = bool(remote_info.payload)
7087 node_current = instance.primary_node
7089 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7090 constants.INSTANCE_REBOOT_HARD]:
7091 for disk in instance.disks:
7092 self.cfg.SetDiskID(disk, node_current)
7093 result = self.rpc.call_instance_reboot(node_current, instance,
7095 self.op.shutdown_timeout)
7096 result.Raise("Could not reboot instance")
7098 if instance_running:
7099 result = self.rpc.call_instance_shutdown(node_current, instance,
7100 self.op.shutdown_timeout)
7101 result.Raise("Could not shutdown instance for full reboot")
7102 _ShutdownInstanceDisks(self, instance)
7104 self.LogInfo("Instance %s was already stopped, starting now",
7106 _StartInstanceDisks(self, instance, ignore_secondaries)
7107 result = self.rpc.call_instance_start(node_current,
7108 (instance, None, None), False)
7109 msg = result.fail_msg
7111 _ShutdownInstanceDisks(self, instance)
7112 raise errors.OpExecError("Could not start instance for"
7113 " full reboot: %s" % msg)
7115 self.cfg.MarkInstanceUp(instance.name)
7118 class LUInstanceShutdown(LogicalUnit):
7119 """Shutdown an instance.
7122 HPATH = "instance-stop"
7123 HTYPE = constants.HTYPE_INSTANCE
7126 def ExpandNames(self):
7127 self._ExpandAndLockInstance()
7129 def BuildHooksEnv(self):
7132 This runs on master, primary and secondary nodes of the instance.
7135 env = _BuildInstanceHookEnvByObject(self, self.instance)
7136 env["TIMEOUT"] = self.op.timeout
7139 def BuildHooksNodes(self):
7140 """Build hooks nodes.
7143 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7146 def CheckPrereq(self):
7147 """Check prerequisites.
7149 This checks that the instance is in the cluster.
7152 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7153 assert self.instance is not None, \
7154 "Cannot retrieve locked instance %s" % self.op.instance_name
7156 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7158 self.primary_offline = \
7159 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7161 if self.primary_offline and self.op.ignore_offline_nodes:
7162 self.LogWarning("Ignoring offline primary node")
7164 _CheckNodeOnline(self, self.instance.primary_node)
7166 def Exec(self, feedback_fn):
7167 """Shutdown the instance.
7170 instance = self.instance
7171 node_current = instance.primary_node
7172 timeout = self.op.timeout
7174 if not self.op.no_remember:
7175 self.cfg.MarkInstanceDown(instance.name)
7177 if self.primary_offline:
7178 assert self.op.ignore_offline_nodes
7179 self.LogInfo("Primary node offline, marked instance as stopped")
7181 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7182 msg = result.fail_msg
7184 self.LogWarning("Could not shutdown instance: %s", msg)
7186 _ShutdownInstanceDisks(self, instance)
7189 class LUInstanceReinstall(LogicalUnit):
7190 """Reinstall an instance.
7193 HPATH = "instance-reinstall"
7194 HTYPE = constants.HTYPE_INSTANCE
7197 def ExpandNames(self):
7198 self._ExpandAndLockInstance()
7200 def BuildHooksEnv(self):
7203 This runs on master, primary and secondary nodes of the instance.
7206 return _BuildInstanceHookEnvByObject(self, self.instance)
7208 def BuildHooksNodes(self):
7209 """Build hooks nodes.
7212 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7215 def CheckPrereq(self):
7216 """Check prerequisites.
7218 This checks that the instance is in the cluster and is not running.
7221 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7222 assert instance is not None, \
7223 "Cannot retrieve locked instance %s" % self.op.instance_name
7224 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7225 " offline, cannot reinstall")
7227 if instance.disk_template == constants.DT_DISKLESS:
7228 raise errors.OpPrereqError("Instance '%s' has no disks" %
7229 self.op.instance_name,
7231 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7233 if self.op.os_type is not None:
7235 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7236 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7237 instance_os = self.op.os_type
7239 instance_os = instance.os
7241 nodelist = list(instance.all_nodes)
7243 if self.op.osparams:
7244 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7245 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7246 self.os_inst = i_osdict # the new dict (without defaults)
7250 self.instance = instance
7252 def Exec(self, feedback_fn):
7253 """Reinstall the instance.
7256 inst = self.instance
7258 if self.op.os_type is not None:
7259 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7260 inst.os = self.op.os_type
7261 # Write to configuration
7262 self.cfg.Update(inst, feedback_fn)
7264 _StartInstanceDisks(self, inst, None)
7266 feedback_fn("Running the instance OS create scripts...")
7267 # FIXME: pass debug option from opcode to backend
7268 result = self.rpc.call_instance_os_add(inst.primary_node,
7269 (inst, self.os_inst), True,
7270 self.op.debug_level)
7271 result.Raise("Could not install OS for instance %s on node %s" %
7272 (inst.name, inst.primary_node))
7274 _ShutdownInstanceDisks(self, inst)
7277 class LUInstanceRecreateDisks(LogicalUnit):
7278 """Recreate an instance's missing disks.
7281 HPATH = "instance-recreate-disks"
7282 HTYPE = constants.HTYPE_INSTANCE
7285 _MODIFYABLE = frozenset([
7286 constants.IDISK_SIZE,
7287 constants.IDISK_MODE,
7290 # New or changed disk parameters may have different semantics
7291 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7292 constants.IDISK_ADOPT,
7294 # TODO: Implement support changing VG while recreating
7296 constants.IDISK_METAVG,
7299 def _RunAllocator(self):
7300 """Run the allocator based on input opcode.
7303 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7306 # The allocator should actually run in "relocate" mode, but current
7307 # allocators don't support relocating all the nodes of an instance at
7308 # the same time. As a workaround we use "allocate" mode, but this is
7309 # suboptimal for two reasons:
7310 # - The instance name passed to the allocator is present in the list of
7311 # existing instances, so there could be a conflict within the
7312 # internal structures of the allocator. This doesn't happen with the
7313 # current allocators, but it's a liability.
7314 # - The allocator counts the resources used by the instance twice: once
7315 # because the instance exists already, and once because it tries to
7316 # allocate a new instance.
7317 # The allocator could choose some of the nodes on which the instance is
7318 # running, but that's not a problem. If the instance nodes are broken,
7319 # they should be already be marked as drained or offline, and hence
7320 # skipped by the allocator. If instance disks have been lost for other
7321 # reasons, then recreating the disks on the same nodes should be fine.
7322 disk_template = self.instance.disk_template
7323 spindle_use = be_full[constants.BE_SPINDLE_USE]
7324 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7325 disk_template=disk_template,
7326 tags=list(self.instance.GetTags()),
7327 os=self.instance.os,
7329 vcpus=be_full[constants.BE_VCPUS],
7330 memory=be_full[constants.BE_MAXMEM],
7331 spindle_use=spindle_use,
7332 disks=[{constants.IDISK_SIZE: d.size,
7333 constants.IDISK_MODE: d.mode}
7334 for d in self.instance.disks],
7335 hypervisor=self.instance.hypervisor)
7336 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7338 ial.Run(self.op.iallocator)
7340 assert req.RequiredNodes() == len(self.instance.all_nodes)
7343 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7344 " %s" % (self.op.iallocator, ial.info),
7347 self.op.nodes = ial.result
7348 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7349 self.op.instance_name, self.op.iallocator,
7350 utils.CommaJoin(ial.result))
7352 def CheckArguments(self):
7353 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7354 # Normalize and convert deprecated list of disk indices
7355 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7357 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7359 raise errors.OpPrereqError("Some disks have been specified more than"
7360 " once: %s" % utils.CommaJoin(duplicates),
7363 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7364 # when neither iallocator nor nodes are specified
7365 if self.op.iallocator or self.op.nodes:
7366 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7368 for (idx, params) in self.op.disks:
7369 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7370 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7372 raise errors.OpPrereqError("Parameters for disk %s try to change"
7373 " unmodifyable parameter(s): %s" %
7374 (idx, utils.CommaJoin(unsupported)),
7377 def ExpandNames(self):
7378 self._ExpandAndLockInstance()
7379 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7381 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7382 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7384 self.needed_locks[locking.LEVEL_NODE] = []
7385 if self.op.iallocator:
7386 # iallocator will select a new node in the same group
7387 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7388 self.needed_locks[locking.LEVEL_NODE_RES] = []
7390 def DeclareLocks(self, level):
7391 if level == locking.LEVEL_NODEGROUP:
7392 assert self.op.iallocator is not None
7393 assert not self.op.nodes
7394 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7395 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7396 # Lock the primary group used by the instance optimistically; this
7397 # requires going via the node before it's locked, requiring
7398 # verification later on
7399 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7400 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7402 elif level == locking.LEVEL_NODE:
7403 # If an allocator is used, then we lock all the nodes in the current
7404 # instance group, as we don't know yet which ones will be selected;
7405 # if we replace the nodes without using an allocator, locks are
7406 # already declared in ExpandNames; otherwise, we need to lock all the
7407 # instance nodes for disk re-creation
7408 if self.op.iallocator:
7409 assert not self.op.nodes
7410 assert not self.needed_locks[locking.LEVEL_NODE]
7411 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7413 # Lock member nodes of the group of the primary node
7414 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7415 self.needed_locks[locking.LEVEL_NODE].extend(
7416 self.cfg.GetNodeGroup(group_uuid).members)
7417 elif not self.op.nodes:
7418 self._LockInstancesNodes(primary_only=False)
7419 elif level == locking.LEVEL_NODE_RES:
7421 self.needed_locks[locking.LEVEL_NODE_RES] = \
7422 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7424 def BuildHooksEnv(self):
7427 This runs on master, primary and secondary nodes of the instance.
7430 return _BuildInstanceHookEnvByObject(self, self.instance)
7432 def BuildHooksNodes(self):
7433 """Build hooks nodes.
7436 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7439 def CheckPrereq(self):
7440 """Check prerequisites.
7442 This checks that the instance is in the cluster and is not running.
7445 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7446 assert instance is not None, \
7447 "Cannot retrieve locked instance %s" % self.op.instance_name
7449 if len(self.op.nodes) != len(instance.all_nodes):
7450 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7451 " %d replacement nodes were specified" %
7452 (instance.name, len(instance.all_nodes),
7453 len(self.op.nodes)),
7455 assert instance.disk_template != constants.DT_DRBD8 or \
7456 len(self.op.nodes) == 2
7457 assert instance.disk_template != constants.DT_PLAIN or \
7458 len(self.op.nodes) == 1
7459 primary_node = self.op.nodes[0]
7461 primary_node = instance.primary_node
7462 if not self.op.iallocator:
7463 _CheckNodeOnline(self, primary_node)
7465 if instance.disk_template == constants.DT_DISKLESS:
7466 raise errors.OpPrereqError("Instance '%s' has no disks" %
7467 self.op.instance_name, errors.ECODE_INVAL)
7469 # Verify if node group locks are still correct
7470 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7472 # Node group locks are acquired only for the primary node (and only
7473 # when the allocator is used)
7474 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7477 # if we replace nodes *and* the old primary is offline, we don't
7478 # check the instance state
7479 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7480 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7481 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7482 msg="cannot recreate disks")
7485 self.disks = dict(self.op.disks)
7487 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7489 maxidx = max(self.disks.keys())
7490 if maxidx >= len(instance.disks):
7491 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7494 if ((self.op.nodes or self.op.iallocator) and
7495 sorted(self.disks.keys()) != range(len(instance.disks))):
7496 raise errors.OpPrereqError("Can't recreate disks partially and"
7497 " change the nodes at the same time",
7500 self.instance = instance
7502 if self.op.iallocator:
7503 self._RunAllocator()
7504 # Release unneeded node and node resource locks
7505 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7506 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7508 def Exec(self, feedback_fn):
7509 """Recreate the disks.
7512 instance = self.instance
7514 assert (self.owned_locks(locking.LEVEL_NODE) ==
7515 self.owned_locks(locking.LEVEL_NODE_RES))
7518 mods = [] # keeps track of needed changes
7520 for idx, disk in enumerate(instance.disks):
7522 changes = self.disks[idx]
7524 # Disk should not be recreated
7528 # update secondaries for disks, if needed
7529 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7530 # need to update the nodes and minors
7531 assert len(self.op.nodes) == 2
7532 assert len(disk.logical_id) == 6 # otherwise disk internals
7534 (_, _, old_port, _, _, old_secret) = disk.logical_id
7535 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7536 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7537 new_minors[0], new_minors[1], old_secret)
7538 assert len(disk.logical_id) == len(new_id)
7542 mods.append((idx, new_id, changes))
7544 # now that we have passed all asserts above, we can apply the mods
7545 # in a single run (to avoid partial changes)
7546 for idx, new_id, changes in mods:
7547 disk = instance.disks[idx]
7548 if new_id is not None:
7549 assert disk.dev_type == constants.LD_DRBD8
7550 disk.logical_id = new_id
7552 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7553 mode=changes.get(constants.IDISK_MODE, None))
7555 # change primary node, if needed
7557 instance.primary_node = self.op.nodes[0]
7558 self.LogWarning("Changing the instance's nodes, you will have to"
7559 " remove any disks left on the older nodes manually")
7562 self.cfg.Update(instance, feedback_fn)
7564 # All touched nodes must be locked
7565 mylocks = self.owned_locks(locking.LEVEL_NODE)
7566 assert mylocks.issuperset(frozenset(instance.all_nodes))
7567 _CreateDisks(self, instance, to_skip=to_skip)
7570 class LUInstanceRename(LogicalUnit):
7571 """Rename an instance.
7574 HPATH = "instance-rename"
7575 HTYPE = constants.HTYPE_INSTANCE
7577 def CheckArguments(self):
7581 if self.op.ip_check and not self.op.name_check:
7582 # TODO: make the ip check more flexible and not depend on the name check
7583 raise errors.OpPrereqError("IP address check requires a name check",
7586 def BuildHooksEnv(self):
7589 This runs on master, primary and secondary nodes of the instance.
7592 env = _BuildInstanceHookEnvByObject(self, self.instance)
7593 env["INSTANCE_NEW_NAME"] = self.op.new_name
7596 def BuildHooksNodes(self):
7597 """Build hooks nodes.
7600 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7603 def CheckPrereq(self):
7604 """Check prerequisites.
7606 This checks that the instance is in the cluster and is not running.
7609 self.op.instance_name = _ExpandInstanceName(self.cfg,
7610 self.op.instance_name)
7611 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7612 assert instance is not None
7613 _CheckNodeOnline(self, instance.primary_node)
7614 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7615 msg="cannot rename")
7616 self.instance = instance
7618 new_name = self.op.new_name
7619 if self.op.name_check:
7620 hostname = _CheckHostnameSane(self, new_name)
7621 new_name = self.op.new_name = hostname.name
7622 if (self.op.ip_check and
7623 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7624 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7625 (hostname.ip, new_name),
7626 errors.ECODE_NOTUNIQUE)
7628 instance_list = self.cfg.GetInstanceList()
7629 if new_name in instance_list and new_name != instance.name:
7630 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7631 new_name, errors.ECODE_EXISTS)
7633 def Exec(self, feedback_fn):
7634 """Rename the instance.
7637 inst = self.instance
7638 old_name = inst.name
7640 rename_file_storage = False
7641 if (inst.disk_template in constants.DTS_FILEBASED and
7642 self.op.new_name != inst.name):
7643 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7644 rename_file_storage = True
7646 self.cfg.RenameInstance(inst.name, self.op.new_name)
7647 # Change the instance lock. This is definitely safe while we hold the BGL.
7648 # Otherwise the new lock would have to be added in acquired mode.
7650 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7651 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7652 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7654 # re-read the instance from the configuration after rename
7655 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7657 if rename_file_storage:
7658 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7659 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7660 old_file_storage_dir,
7661 new_file_storage_dir)
7662 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7663 " (but the instance has been renamed in Ganeti)" %
7664 (inst.primary_node, old_file_storage_dir,
7665 new_file_storage_dir))
7667 _StartInstanceDisks(self, inst, None)
7668 # update info on disks
7669 info = _GetInstanceInfoText(inst)
7670 for (idx, disk) in enumerate(inst.disks):
7671 for node in inst.all_nodes:
7672 self.cfg.SetDiskID(disk, node)
7673 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7675 self.LogWarning("Error setting info on node %s for disk %s: %s",
7676 node, idx, result.fail_msg)
7678 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7679 old_name, self.op.debug_level)
7680 msg = result.fail_msg
7682 msg = ("Could not run OS rename script for instance %s on node %s"
7683 " (but the instance has been renamed in Ganeti): %s" %
7684 (inst.name, inst.primary_node, msg))
7685 self.LogWarning(msg)
7687 _ShutdownInstanceDisks(self, inst)
7692 class LUInstanceRemove(LogicalUnit):
7693 """Remove an instance.
7696 HPATH = "instance-remove"
7697 HTYPE = constants.HTYPE_INSTANCE
7700 def ExpandNames(self):
7701 self._ExpandAndLockInstance()
7702 self.needed_locks[locking.LEVEL_NODE] = []
7703 self.needed_locks[locking.LEVEL_NODE_RES] = []
7704 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7706 def DeclareLocks(self, level):
7707 if level == locking.LEVEL_NODE:
7708 self._LockInstancesNodes()
7709 elif level == locking.LEVEL_NODE_RES:
7711 self.needed_locks[locking.LEVEL_NODE_RES] = \
7712 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7714 def BuildHooksEnv(self):
7717 This runs on master, primary and secondary nodes of the instance.
7720 env = _BuildInstanceHookEnvByObject(self, self.instance)
7721 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7724 def BuildHooksNodes(self):
7725 """Build hooks nodes.
7728 nl = [self.cfg.GetMasterNode()]
7729 nl_post = list(self.instance.all_nodes) + nl
7730 return (nl, nl_post)
7732 def CheckPrereq(self):
7733 """Check prerequisites.
7735 This checks that the instance is in the cluster.
7738 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7739 assert self.instance is not None, \
7740 "Cannot retrieve locked instance %s" % self.op.instance_name
7742 def Exec(self, feedback_fn):
7743 """Remove the instance.
7746 instance = self.instance
7747 logging.info("Shutting down instance %s on node %s",
7748 instance.name, instance.primary_node)
7750 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7751 self.op.shutdown_timeout)
7752 msg = result.fail_msg
7754 if self.op.ignore_failures:
7755 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7757 raise errors.OpExecError("Could not shutdown instance %s on"
7759 (instance.name, instance.primary_node, msg))
7761 assert (self.owned_locks(locking.LEVEL_NODE) ==
7762 self.owned_locks(locking.LEVEL_NODE_RES))
7763 assert not (set(instance.all_nodes) -
7764 self.owned_locks(locking.LEVEL_NODE)), \
7765 "Not owning correct locks"
7767 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7770 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7771 """Utility function to remove an instance.
7774 logging.info("Removing block devices for instance %s", instance.name)
7776 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7777 if not ignore_failures:
7778 raise errors.OpExecError("Can't remove instance's disks")
7779 feedback_fn("Warning: can't remove instance's disks")
7781 logging.info("Removing instance %s out of cluster config", instance.name)
7783 lu.cfg.RemoveInstance(instance.name)
7785 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7786 "Instance lock removal conflict"
7788 # Remove lock for the instance
7789 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7792 class LUInstanceQuery(NoHooksLU):
7793 """Logical unit for querying instances.
7796 # pylint: disable=W0142
7799 def CheckArguments(self):
7800 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7801 self.op.output_fields, self.op.use_locking)
7803 def ExpandNames(self):
7804 self.iq.ExpandNames(self)
7806 def DeclareLocks(self, level):
7807 self.iq.DeclareLocks(self, level)
7809 def Exec(self, feedback_fn):
7810 return self.iq.OldStyleQuery(self)
7813 def _ExpandNamesForMigration(lu):
7814 """Expands names for use with L{TLMigrateInstance}.
7816 @type lu: L{LogicalUnit}
7819 if lu.op.target_node is not None:
7820 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7822 lu.needed_locks[locking.LEVEL_NODE] = []
7823 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7825 lu.needed_locks[locking.LEVEL_NODE_RES] = []
7826 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7828 # The node allocation lock is actually only needed for replicated instances
7829 # (e.g. DRBD8) and if an iallocator is used.
7830 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
7833 def _DeclareLocksForMigration(lu, level):
7834 """Declares locks for L{TLMigrateInstance}.
7836 @type lu: L{LogicalUnit}
7837 @param level: Lock level
7840 if level == locking.LEVEL_NODE_ALLOC:
7841 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
7843 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7845 if instance.disk_template in constants.DTS_EXT_MIRROR:
7846 if lu.op.target_node is None:
7847 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7848 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7850 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7852 del lu.recalculate_locks[locking.LEVEL_NODE]
7854 lu._LockInstancesNodes() # pylint: disable=W0212
7856 elif level == locking.LEVEL_NODE:
7857 # Node locks are declared together with the node allocation lock
7858 assert lu.needed_locks[locking.LEVEL_NODE]
7860 elif level == locking.LEVEL_NODE_RES:
7862 lu.needed_locks[locking.LEVEL_NODE_RES] = \
7863 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7866 class LUInstanceFailover(LogicalUnit):
7867 """Failover an instance.
7870 HPATH = "instance-failover"
7871 HTYPE = constants.HTYPE_INSTANCE
7874 def CheckArguments(self):
7875 """Check the arguments.
7878 self.iallocator = getattr(self.op, "iallocator", None)
7879 self.target_node = getattr(self.op, "target_node", None)
7881 def ExpandNames(self):
7882 self._ExpandAndLockInstance()
7883 _ExpandNamesForMigration(self)
7886 TLMigrateInstance(self, self.op.instance_name, False, True, False,
7887 self.op.ignore_consistency, True,
7888 self.op.shutdown_timeout, self.op.ignore_ipolicy)
7890 self.tasklets = [self._migrater]
7892 def DeclareLocks(self, level):
7893 _DeclareLocksForMigration(self, level)
7895 def BuildHooksEnv(self):
7898 This runs on master, primary and secondary nodes of the instance.
7901 instance = self._migrater.instance
7902 source_node = instance.primary_node
7903 target_node = self.op.target_node
7905 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7906 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7907 "OLD_PRIMARY": source_node,
7908 "NEW_PRIMARY": target_node,
7911 if instance.disk_template in constants.DTS_INT_MIRROR:
7912 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7913 env["NEW_SECONDARY"] = source_node
7915 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7917 env.update(_BuildInstanceHookEnvByObject(self, instance))
7921 def BuildHooksNodes(self):
7922 """Build hooks nodes.
7925 instance = self._migrater.instance
7926 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7927 return (nl, nl + [instance.primary_node])
7930 class LUInstanceMigrate(LogicalUnit):
7931 """Migrate an instance.
7933 This is migration without shutting down, compared to the failover,
7934 which is done with shutdown.
7937 HPATH = "instance-migrate"
7938 HTYPE = constants.HTYPE_INSTANCE
7941 def ExpandNames(self):
7942 self._ExpandAndLockInstance()
7943 _ExpandNamesForMigration(self)
7946 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7947 False, self.op.allow_failover, False,
7948 self.op.allow_runtime_changes,
7949 constants.DEFAULT_SHUTDOWN_TIMEOUT,
7950 self.op.ignore_ipolicy)
7952 self.tasklets = [self._migrater]
7954 def DeclareLocks(self, level):
7955 _DeclareLocksForMigration(self, level)
7957 def BuildHooksEnv(self):
7960 This runs on master, primary and secondary nodes of the instance.
7963 instance = self._migrater.instance
7964 source_node = instance.primary_node
7965 target_node = self.op.target_node
7966 env = _BuildInstanceHookEnvByObject(self, instance)
7968 "MIGRATE_LIVE": self._migrater.live,
7969 "MIGRATE_CLEANUP": self.op.cleanup,
7970 "OLD_PRIMARY": source_node,
7971 "NEW_PRIMARY": target_node,
7972 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7975 if instance.disk_template in constants.DTS_INT_MIRROR:
7976 env["OLD_SECONDARY"] = target_node
7977 env["NEW_SECONDARY"] = source_node
7979 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7983 def BuildHooksNodes(self):
7984 """Build hooks nodes.
7987 instance = self._migrater.instance
7988 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7989 return (nl, nl + [instance.primary_node])
7992 class LUInstanceMove(LogicalUnit):
7993 """Move an instance by data-copying.
7996 HPATH = "instance-move"
7997 HTYPE = constants.HTYPE_INSTANCE
8000 def ExpandNames(self):
8001 self._ExpandAndLockInstance()
8002 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8003 self.op.target_node = target_node
8004 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8005 self.needed_locks[locking.LEVEL_NODE_RES] = []
8006 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8008 def DeclareLocks(self, level):
8009 if level == locking.LEVEL_NODE:
8010 self._LockInstancesNodes(primary_only=True)
8011 elif level == locking.LEVEL_NODE_RES:
8013 self.needed_locks[locking.LEVEL_NODE_RES] = \
8014 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8016 def BuildHooksEnv(self):
8019 This runs on master, primary and secondary nodes of the instance.
8023 "TARGET_NODE": self.op.target_node,
8024 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8026 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8029 def BuildHooksNodes(self):
8030 """Build hooks nodes.
8034 self.cfg.GetMasterNode(),
8035 self.instance.primary_node,
8036 self.op.target_node,
8040 def CheckPrereq(self):
8041 """Check prerequisites.
8043 This checks that the instance is in the cluster.
8046 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8047 assert self.instance is not None, \
8048 "Cannot retrieve locked instance %s" % self.op.instance_name
8050 node = self.cfg.GetNodeInfo(self.op.target_node)
8051 assert node is not None, \
8052 "Cannot retrieve locked node %s" % self.op.target_node
8054 self.target_node = target_node = node.name
8056 if target_node == instance.primary_node:
8057 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8058 (instance.name, target_node),
8061 bep = self.cfg.GetClusterInfo().FillBE(instance)
8063 for idx, dsk in enumerate(instance.disks):
8064 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8065 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8066 " cannot copy" % idx, errors.ECODE_STATE)
8068 _CheckNodeOnline(self, target_node)
8069 _CheckNodeNotDrained(self, target_node)
8070 _CheckNodeVmCapable(self, target_node)
8071 cluster = self.cfg.GetClusterInfo()
8072 group_info = self.cfg.GetNodeGroup(node.group)
8073 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8074 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8075 ignore=self.op.ignore_ipolicy)
8077 if instance.admin_state == constants.ADMINST_UP:
8078 # check memory requirements on the secondary node
8079 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8080 instance.name, bep[constants.BE_MAXMEM],
8081 instance.hypervisor)
8083 self.LogInfo("Not checking memory on the secondary node as"
8084 " instance will not be started")
8086 # check bridge existance
8087 _CheckInstanceBridgesExist(self, instance, node=target_node)
8089 def Exec(self, feedback_fn):
8090 """Move an instance.
8092 The move is done by shutting it down on its present node, copying
8093 the data over (slow) and starting it on the new node.
8096 instance = self.instance
8098 source_node = instance.primary_node
8099 target_node = self.target_node
8101 self.LogInfo("Shutting down instance %s on source node %s",
8102 instance.name, source_node)
8104 assert (self.owned_locks(locking.LEVEL_NODE) ==
8105 self.owned_locks(locking.LEVEL_NODE_RES))
8107 result = self.rpc.call_instance_shutdown(source_node, instance,
8108 self.op.shutdown_timeout)
8109 msg = result.fail_msg
8111 if self.op.ignore_consistency:
8112 self.LogWarning("Could not shutdown instance %s on node %s."
8113 " Proceeding anyway. Please make sure node"
8114 " %s is down. Error details: %s",
8115 instance.name, source_node, source_node, msg)
8117 raise errors.OpExecError("Could not shutdown instance %s on"
8119 (instance.name, source_node, msg))
8121 # create the target disks
8123 _CreateDisks(self, instance, target_node=target_node)
8124 except errors.OpExecError:
8125 self.LogWarning("Device creation failed, reverting...")
8127 _RemoveDisks(self, instance, target_node=target_node)
8129 self.cfg.ReleaseDRBDMinors(instance.name)
8132 cluster_name = self.cfg.GetClusterInfo().cluster_name
8135 # activate, get path, copy the data over
8136 for idx, disk in enumerate(instance.disks):
8137 self.LogInfo("Copying data for disk %d", idx)
8138 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8139 instance.name, True, idx)
8141 self.LogWarning("Can't assemble newly created disk %d: %s",
8142 idx, result.fail_msg)
8143 errs.append(result.fail_msg)
8145 dev_path = result.payload
8146 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8147 target_node, dev_path,
8150 self.LogWarning("Can't copy data over for disk %d: %s",
8151 idx, result.fail_msg)
8152 errs.append(result.fail_msg)
8156 self.LogWarning("Some disks failed to copy, aborting")
8158 _RemoveDisks(self, instance, target_node=target_node)
8160 self.cfg.ReleaseDRBDMinors(instance.name)
8161 raise errors.OpExecError("Errors during disk copy: %s" %
8164 instance.primary_node = target_node
8165 self.cfg.Update(instance, feedback_fn)
8167 self.LogInfo("Removing the disks on the original node")
8168 _RemoveDisks(self, instance, target_node=source_node)
8170 # Only start the instance if it's marked as up
8171 if instance.admin_state == constants.ADMINST_UP:
8172 self.LogInfo("Starting instance %s on node %s",
8173 instance.name, target_node)
8175 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8176 ignore_secondaries=True)
8178 _ShutdownInstanceDisks(self, instance)
8179 raise errors.OpExecError("Can't activate the instance's disks")
8181 result = self.rpc.call_instance_start(target_node,
8182 (instance, None, None), False)
8183 msg = result.fail_msg
8185 _ShutdownInstanceDisks(self, instance)
8186 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8187 (instance.name, target_node, msg))
8190 class LUNodeMigrate(LogicalUnit):
8191 """Migrate all instances from a node.
8194 HPATH = "node-migrate"
8195 HTYPE = constants.HTYPE_NODE
8198 def CheckArguments(self):
8201 def ExpandNames(self):
8202 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8204 self.share_locks = _ShareAll()
8205 self.needed_locks = {
8206 locking.LEVEL_NODE: [self.op.node_name],
8209 def BuildHooksEnv(self):
8212 This runs on the master, the primary and all the secondaries.
8216 "NODE_NAME": self.op.node_name,
8217 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8220 def BuildHooksNodes(self):
8221 """Build hooks nodes.
8224 nl = [self.cfg.GetMasterNode()]
8227 def CheckPrereq(self):
8230 def Exec(self, feedback_fn):
8231 # Prepare jobs for migration instances
8232 allow_runtime_changes = self.op.allow_runtime_changes
8234 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8237 iallocator=self.op.iallocator,
8238 target_node=self.op.target_node,
8239 allow_runtime_changes=allow_runtime_changes,
8240 ignore_ipolicy=self.op.ignore_ipolicy)]
8241 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8243 # TODO: Run iallocator in this opcode and pass correct placement options to
8244 # OpInstanceMigrate. Since other jobs can modify the cluster between
8245 # running the iallocator and the actual migration, a good consistency model
8246 # will have to be found.
8248 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8249 frozenset([self.op.node_name]))
8251 return ResultWithJobs(jobs)
8254 class TLMigrateInstance(Tasklet):
8255 """Tasklet class for instance migration.
8258 @ivar live: whether the migration will be done live or non-live;
8259 this variable is initalized only after CheckPrereq has run
8260 @type cleanup: boolean
8261 @ivar cleanup: Wheater we cleanup from a failed migration
8262 @type iallocator: string
8263 @ivar iallocator: The iallocator used to determine target_node
8264 @type target_node: string
8265 @ivar target_node: If given, the target_node to reallocate the instance to
8266 @type failover: boolean
8267 @ivar failover: Whether operation results in failover or migration
8268 @type fallback: boolean
8269 @ivar fallback: Whether fallback to failover is allowed if migration not
8271 @type ignore_consistency: boolean
8272 @ivar ignore_consistency: Wheter we should ignore consistency between source
8274 @type shutdown_timeout: int
8275 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8276 @type ignore_ipolicy: bool
8277 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8282 _MIGRATION_POLL_INTERVAL = 1 # seconds
8283 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8285 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8286 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8288 """Initializes this class.
8291 Tasklet.__init__(self, lu)
8294 self.instance_name = instance_name
8295 self.cleanup = cleanup
8296 self.live = False # will be overridden later
8297 self.failover = failover
8298 self.fallback = fallback
8299 self.ignore_consistency = ignore_consistency
8300 self.shutdown_timeout = shutdown_timeout
8301 self.ignore_ipolicy = ignore_ipolicy
8302 self.allow_runtime_changes = allow_runtime_changes
8304 def CheckPrereq(self):
8305 """Check prerequisites.
8307 This checks that the instance is in the cluster.
8310 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8311 instance = self.cfg.GetInstanceInfo(instance_name)
8312 assert instance is not None
8313 self.instance = instance
8314 cluster = self.cfg.GetClusterInfo()
8316 if (not self.cleanup and
8317 not instance.admin_state == constants.ADMINST_UP and
8318 not self.failover and self.fallback):
8319 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8320 " switching to failover")
8321 self.failover = True
8323 if instance.disk_template not in constants.DTS_MIRRORED:
8328 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8329 " %s" % (instance.disk_template, text),
8332 if instance.disk_template in constants.DTS_EXT_MIRROR:
8333 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8335 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8337 if self.lu.op.iallocator:
8338 self._RunAllocator()
8340 # We set set self.target_node as it is required by
8342 self.target_node = self.lu.op.target_node
8344 # Check that the target node is correct in terms of instance policy
8345 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8346 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8347 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8349 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8350 ignore=self.ignore_ipolicy)
8352 # self.target_node is already populated, either directly or by the
8354 target_node = self.target_node
8355 if self.target_node == instance.primary_node:
8356 raise errors.OpPrereqError("Cannot migrate instance %s"
8357 " to its primary (%s)" %
8358 (instance.name, instance.primary_node),
8361 if len(self.lu.tasklets) == 1:
8362 # It is safe to release locks only when we're the only tasklet
8364 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8365 keep=[instance.primary_node, self.target_node])
8366 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8369 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8371 secondary_nodes = instance.secondary_nodes
8372 if not secondary_nodes:
8373 raise errors.ConfigurationError("No secondary node but using"
8374 " %s disk template" %
8375 instance.disk_template)
8376 target_node = secondary_nodes[0]
8377 if self.lu.op.iallocator or (self.lu.op.target_node and
8378 self.lu.op.target_node != target_node):
8380 text = "failed over"
8383 raise errors.OpPrereqError("Instances with disk template %s cannot"
8384 " be %s to arbitrary nodes"
8385 " (neither an iallocator nor a target"
8386 " node can be passed)" %
8387 (instance.disk_template, text),
8389 nodeinfo = self.cfg.GetNodeInfo(target_node)
8390 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8391 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8393 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8394 ignore=self.ignore_ipolicy)
8396 i_be = cluster.FillBE(instance)
8398 # check memory requirements on the secondary node
8399 if (not self.cleanup and
8400 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8401 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8402 "migrating instance %s" %
8404 i_be[constants.BE_MINMEM],
8405 instance.hypervisor)
8407 self.lu.LogInfo("Not checking memory on the secondary node as"
8408 " instance will not be started")
8410 # check if failover must be forced instead of migration
8411 if (not self.cleanup and not self.failover and
8412 i_be[constants.BE_ALWAYS_FAILOVER]):
8413 self.lu.LogInfo("Instance configured to always failover; fallback"
8415 self.failover = True
8417 # check bridge existance
8418 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8420 if not self.cleanup:
8421 _CheckNodeNotDrained(self.lu, target_node)
8422 if not self.failover:
8423 result = self.rpc.call_instance_migratable(instance.primary_node,
8425 if result.fail_msg and self.fallback:
8426 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8428 self.failover = True
8430 result.Raise("Can't migrate, please use failover",
8431 prereq=True, ecode=errors.ECODE_STATE)
8433 assert not (self.failover and self.cleanup)
8435 if not self.failover:
8436 if self.lu.op.live is not None and self.lu.op.mode is not None:
8437 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8438 " parameters are accepted",
8440 if self.lu.op.live is not None:
8442 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8444 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8445 # reset the 'live' parameter to None so that repeated
8446 # invocations of CheckPrereq do not raise an exception
8447 self.lu.op.live = None
8448 elif self.lu.op.mode is None:
8449 # read the default value from the hypervisor
8450 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8451 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8453 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8455 # Failover is never live
8458 if not (self.failover or self.cleanup):
8459 remote_info = self.rpc.call_instance_info(instance.primary_node,
8461 instance.hypervisor)
8462 remote_info.Raise("Error checking instance on node %s" %
8463 instance.primary_node)
8464 instance_running = bool(remote_info.payload)
8465 if instance_running:
8466 self.current_mem = int(remote_info.payload["memory"])
8468 def _RunAllocator(self):
8469 """Run the allocator based on input opcode.
8472 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8474 # FIXME: add a self.ignore_ipolicy option
8475 req = iallocator.IAReqRelocate(name=self.instance_name,
8476 relocate_from=[self.instance.primary_node])
8477 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8479 ial.Run(self.lu.op.iallocator)
8482 raise errors.OpPrereqError("Can't compute nodes using"
8483 " iallocator '%s': %s" %
8484 (self.lu.op.iallocator, ial.info),
8486 self.target_node = ial.result[0]
8487 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8488 self.instance_name, self.lu.op.iallocator,
8489 utils.CommaJoin(ial.result))
8491 def _WaitUntilSync(self):
8492 """Poll with custom rpc for disk sync.
8494 This uses our own step-based rpc call.
8497 self.feedback_fn("* wait until resync is done")
8501 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8503 (self.instance.disks,
8506 for node, nres in result.items():
8507 nres.Raise("Cannot resync disks on node %s" % node)
8508 node_done, node_percent = nres.payload
8509 all_done = all_done and node_done
8510 if node_percent is not None:
8511 min_percent = min(min_percent, node_percent)
8513 if min_percent < 100:
8514 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8517 def _EnsureSecondary(self, node):
8518 """Demote a node to secondary.
8521 self.feedback_fn("* switching node %s to secondary mode" % node)
8523 for dev in self.instance.disks:
8524 self.cfg.SetDiskID(dev, node)
8526 result = self.rpc.call_blockdev_close(node, self.instance.name,
8527 self.instance.disks)
8528 result.Raise("Cannot change disk to secondary on node %s" % node)
8530 def _GoStandalone(self):
8531 """Disconnect from the network.
8534 self.feedback_fn("* changing into standalone mode")
8535 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8536 self.instance.disks)
8537 for node, nres in result.items():
8538 nres.Raise("Cannot disconnect disks node %s" % node)
8540 def _GoReconnect(self, multimaster):
8541 """Reconnect to the network.
8547 msg = "single-master"
8548 self.feedback_fn("* changing disks into %s mode" % msg)
8549 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8550 (self.instance.disks, self.instance),
8551 self.instance.name, multimaster)
8552 for node, nres in result.items():
8553 nres.Raise("Cannot change disks config on node %s" % node)
8555 def _ExecCleanup(self):
8556 """Try to cleanup after a failed migration.
8558 The cleanup is done by:
8559 - check that the instance is running only on one node
8560 (and update the config if needed)
8561 - change disks on its secondary node to secondary
8562 - wait until disks are fully synchronized
8563 - disconnect from the network
8564 - change disks into single-master mode
8565 - wait again until disks are fully synchronized
8568 instance = self.instance
8569 target_node = self.target_node
8570 source_node = self.source_node
8572 # check running on only one node
8573 self.feedback_fn("* checking where the instance actually runs"
8574 " (if this hangs, the hypervisor might be in"
8576 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8577 for node, result in ins_l.items():
8578 result.Raise("Can't contact node %s" % node)
8580 runningon_source = instance.name in ins_l[source_node].payload
8581 runningon_target = instance.name in ins_l[target_node].payload
8583 if runningon_source and runningon_target:
8584 raise errors.OpExecError("Instance seems to be running on two nodes,"
8585 " or the hypervisor is confused; you will have"
8586 " to ensure manually that it runs only on one"
8587 " and restart this operation")
8589 if not (runningon_source or runningon_target):
8590 raise errors.OpExecError("Instance does not seem to be running at all;"
8591 " in this case it's safer to repair by"
8592 " running 'gnt-instance stop' to ensure disk"
8593 " shutdown, and then restarting it")
8595 if runningon_target:
8596 # the migration has actually succeeded, we need to update the config
8597 self.feedback_fn("* instance running on secondary node (%s),"
8598 " updating config" % target_node)
8599 instance.primary_node = target_node
8600 self.cfg.Update(instance, self.feedback_fn)
8601 demoted_node = source_node
8603 self.feedback_fn("* instance confirmed to be running on its"
8604 " primary node (%s)" % source_node)
8605 demoted_node = target_node
8607 if instance.disk_template in constants.DTS_INT_MIRROR:
8608 self._EnsureSecondary(demoted_node)
8610 self._WaitUntilSync()
8611 except errors.OpExecError:
8612 # we ignore here errors, since if the device is standalone, it
8613 # won't be able to sync
8615 self._GoStandalone()
8616 self._GoReconnect(False)
8617 self._WaitUntilSync()
8619 self.feedback_fn("* done")
8621 def _RevertDiskStatus(self):
8622 """Try to revert the disk status after a failed migration.
8625 target_node = self.target_node
8626 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8630 self._EnsureSecondary(target_node)
8631 self._GoStandalone()
8632 self._GoReconnect(False)
8633 self._WaitUntilSync()
8634 except errors.OpExecError, err:
8635 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8636 " please try to recover the instance manually;"
8637 " error '%s'" % str(err))
8639 def _AbortMigration(self):
8640 """Call the hypervisor code to abort a started migration.
8643 instance = self.instance
8644 target_node = self.target_node
8645 source_node = self.source_node
8646 migration_info = self.migration_info
8648 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8652 abort_msg = abort_result.fail_msg
8654 logging.error("Aborting migration failed on target node %s: %s",
8655 target_node, abort_msg)
8656 # Don't raise an exception here, as we stil have to try to revert the
8657 # disk status, even if this step failed.
8659 abort_result = self.rpc.call_instance_finalize_migration_src(
8660 source_node, instance, False, self.live)
8661 abort_msg = abort_result.fail_msg
8663 logging.error("Aborting migration failed on source node %s: %s",
8664 source_node, abort_msg)
8666 def _ExecMigration(self):
8667 """Migrate an instance.
8669 The migrate is done by:
8670 - change the disks into dual-master mode
8671 - wait until disks are fully synchronized again
8672 - migrate the instance
8673 - change disks on the new secondary node (the old primary) to secondary
8674 - wait until disks are fully synchronized
8675 - change disks into single-master mode
8678 instance = self.instance
8679 target_node = self.target_node
8680 source_node = self.source_node
8682 # Check for hypervisor version mismatch and warn the user.
8683 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8684 None, [self.instance.hypervisor])
8685 for ninfo in nodeinfo.values():
8686 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8688 (_, _, (src_info, )) = nodeinfo[source_node].payload
8689 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8691 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8692 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8693 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8694 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8695 if src_version != dst_version:
8696 self.feedback_fn("* warning: hypervisor version mismatch between"
8697 " source (%s) and target (%s) node" %
8698 (src_version, dst_version))
8700 self.feedback_fn("* checking disk consistency between source and target")
8701 for (idx, dev) in enumerate(instance.disks):
8702 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8703 raise errors.OpExecError("Disk %s is degraded or not fully"
8704 " synchronized on target node,"
8705 " aborting migration" % idx)
8707 if self.current_mem > self.tgt_free_mem:
8708 if not self.allow_runtime_changes:
8709 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8710 " free memory to fit instance %s on target"
8711 " node %s (have %dMB, need %dMB)" %
8712 (instance.name, target_node,
8713 self.tgt_free_mem, self.current_mem))
8714 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8715 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8718 rpcres.Raise("Cannot modify instance runtime memory")
8720 # First get the migration information from the remote node
8721 result = self.rpc.call_migration_info(source_node, instance)
8722 msg = result.fail_msg
8724 log_err = ("Failed fetching source migration information from %s: %s" %
8726 logging.error(log_err)
8727 raise errors.OpExecError(log_err)
8729 self.migration_info = migration_info = result.payload
8731 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8732 # Then switch the disks to master/master mode
8733 self._EnsureSecondary(target_node)
8734 self._GoStandalone()
8735 self._GoReconnect(True)
8736 self._WaitUntilSync()
8738 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8739 result = self.rpc.call_accept_instance(target_node,
8742 self.nodes_ip[target_node])
8744 msg = result.fail_msg
8746 logging.error("Instance pre-migration failed, trying to revert"
8747 " disk status: %s", msg)
8748 self.feedback_fn("Pre-migration failed, aborting")
8749 self._AbortMigration()
8750 self._RevertDiskStatus()
8751 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8752 (instance.name, msg))
8754 self.feedback_fn("* migrating instance to %s" % target_node)
8755 result = self.rpc.call_instance_migrate(source_node, instance,
8756 self.nodes_ip[target_node],
8758 msg = result.fail_msg
8760 logging.error("Instance migration failed, trying to revert"
8761 " disk status: %s", msg)
8762 self.feedback_fn("Migration failed, aborting")
8763 self._AbortMigration()
8764 self._RevertDiskStatus()
8765 raise errors.OpExecError("Could not migrate instance %s: %s" %
8766 (instance.name, msg))
8768 self.feedback_fn("* starting memory transfer")
8769 last_feedback = time.time()
8771 result = self.rpc.call_instance_get_migration_status(source_node,
8773 msg = result.fail_msg
8774 ms = result.payload # MigrationStatus instance
8775 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8776 logging.error("Instance migration failed, trying to revert"
8777 " disk status: %s", msg)
8778 self.feedback_fn("Migration failed, aborting")
8779 self._AbortMigration()
8780 self._RevertDiskStatus()
8782 msg = "hypervisor returned failure"
8783 raise errors.OpExecError("Could not migrate instance %s: %s" %
8784 (instance.name, msg))
8786 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8787 self.feedback_fn("* memory transfer complete")
8790 if (utils.TimeoutExpired(last_feedback,
8791 self._MIGRATION_FEEDBACK_INTERVAL) and
8792 ms.transferred_ram is not None):
8793 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8794 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8795 last_feedback = time.time()
8797 time.sleep(self._MIGRATION_POLL_INTERVAL)
8799 result = self.rpc.call_instance_finalize_migration_src(source_node,
8803 msg = result.fail_msg
8805 logging.error("Instance migration succeeded, but finalization failed"
8806 " on the source node: %s", msg)
8807 raise errors.OpExecError("Could not finalize instance migration: %s" %
8810 instance.primary_node = target_node
8812 # distribute new instance config to the other nodes
8813 self.cfg.Update(instance, self.feedback_fn)
8815 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8819 msg = result.fail_msg
8821 logging.error("Instance migration succeeded, but finalization failed"
8822 " on the target node: %s", msg)
8823 raise errors.OpExecError("Could not finalize instance migration: %s" %
8826 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8827 self._EnsureSecondary(source_node)
8828 self._WaitUntilSync()
8829 self._GoStandalone()
8830 self._GoReconnect(False)
8831 self._WaitUntilSync()
8833 # If the instance's disk template is `rbd' and there was a successful
8834 # migration, unmap the device from the source node.
8835 if self.instance.disk_template == constants.DT_RBD:
8836 disks = _ExpandCheckDisks(instance, instance.disks)
8837 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8839 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8840 msg = result.fail_msg
8842 logging.error("Migration was successful, but couldn't unmap the"
8843 " block device %s on source node %s: %s",
8844 disk.iv_name, source_node, msg)
8845 logging.error("You need to unmap the device %s manually on %s",
8846 disk.iv_name, source_node)
8848 self.feedback_fn("* done")
8850 def _ExecFailover(self):
8851 """Failover an instance.
8853 The failover is done by shutting it down on its present node and
8854 starting it on the secondary.
8857 instance = self.instance
8858 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8860 source_node = instance.primary_node
8861 target_node = self.target_node
8863 if instance.admin_state == constants.ADMINST_UP:
8864 self.feedback_fn("* checking disk consistency between source and target")
8865 for (idx, dev) in enumerate(instance.disks):
8866 # for drbd, these are drbd over lvm
8867 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8869 if primary_node.offline:
8870 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8872 (primary_node.name, idx, target_node))
8873 elif not self.ignore_consistency:
8874 raise errors.OpExecError("Disk %s is degraded on target node,"
8875 " aborting failover" % idx)
8877 self.feedback_fn("* not checking disk consistency as instance is not"
8880 self.feedback_fn("* shutting down instance on source node")
8881 logging.info("Shutting down instance %s on node %s",
8882 instance.name, source_node)
8884 result = self.rpc.call_instance_shutdown(source_node, instance,
8885 self.shutdown_timeout)
8886 msg = result.fail_msg
8888 if self.ignore_consistency or primary_node.offline:
8889 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8890 " proceeding anyway; please make sure node"
8891 " %s is down; error details: %s",
8892 instance.name, source_node, source_node, msg)
8894 raise errors.OpExecError("Could not shutdown instance %s on"
8896 (instance.name, source_node, msg))
8898 self.feedback_fn("* deactivating the instance's disks on source node")
8899 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8900 raise errors.OpExecError("Can't shut down the instance's disks")
8902 instance.primary_node = target_node
8903 # distribute new instance config to the other nodes
8904 self.cfg.Update(instance, self.feedback_fn)
8906 # Only start the instance if it's marked as up
8907 if instance.admin_state == constants.ADMINST_UP:
8908 self.feedback_fn("* activating the instance's disks on target node %s" %
8910 logging.info("Starting instance %s on node %s",
8911 instance.name, target_node)
8913 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8914 ignore_secondaries=True)
8916 _ShutdownInstanceDisks(self.lu, instance)
8917 raise errors.OpExecError("Can't activate the instance's disks")
8919 self.feedback_fn("* starting the instance on the target node %s" %
8921 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8923 msg = result.fail_msg
8925 _ShutdownInstanceDisks(self.lu, instance)
8926 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8927 (instance.name, target_node, msg))
8929 def Exec(self, feedback_fn):
8930 """Perform the migration.
8933 self.feedback_fn = feedback_fn
8934 self.source_node = self.instance.primary_node
8936 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8937 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8938 self.target_node = self.instance.secondary_nodes[0]
8939 # Otherwise self.target_node has been populated either
8940 # directly, or through an iallocator.
8942 self.all_nodes = [self.source_node, self.target_node]
8943 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8944 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8947 feedback_fn("Failover instance %s" % self.instance.name)
8948 self._ExecFailover()
8950 feedback_fn("Migrating instance %s" % self.instance.name)
8953 return self._ExecCleanup()
8955 return self._ExecMigration()
8958 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8960 """Wrapper around L{_CreateBlockDevInner}.
8962 This method annotates the root device first.
8965 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8966 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8970 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8972 """Create a tree of block devices on a given node.
8974 If this device type has to be created on secondaries, create it and
8977 If not, just recurse to children keeping the same 'force' value.
8979 @attention: The device has to be annotated already.
8981 @param lu: the lu on whose behalf we execute
8982 @param node: the node on which to create the device
8983 @type instance: L{objects.Instance}
8984 @param instance: the instance which owns the device
8985 @type device: L{objects.Disk}
8986 @param device: the device to create
8987 @type force_create: boolean
8988 @param force_create: whether to force creation of this device; this
8989 will be change to True whenever we find a device which has
8990 CreateOnSecondary() attribute
8991 @param info: the extra 'metadata' we should attach to the device
8992 (this will be represented as a LVM tag)
8993 @type force_open: boolean
8994 @param force_open: this parameter will be passes to the
8995 L{backend.BlockdevCreate} function where it specifies
8996 whether we run on primary or not, and it affects both
8997 the child assembly and the device own Open() execution
9000 if device.CreateOnSecondary():
9004 for child in device.children:
9005 _CreateBlockDevInner(lu, node, instance, child, force_create,
9008 if not force_create:
9011 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
9014 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
9015 """Create a single block device on a given node.
9017 This will not recurse over children of the device, so they must be
9020 @param lu: the lu on whose behalf we execute
9021 @param node: the node on which to create the device
9022 @type instance: L{objects.Instance}
9023 @param instance: the instance which owns the device
9024 @type device: L{objects.Disk}
9025 @param device: the device to create
9026 @param info: the extra 'metadata' we should attach to the device
9027 (this will be represented as a LVM tag)
9028 @type force_open: boolean
9029 @param force_open: this parameter will be passes to the
9030 L{backend.BlockdevCreate} function where it specifies
9031 whether we run on primary or not, and it affects both
9032 the child assembly and the device own Open() execution
9035 lu.cfg.SetDiskID(device, node)
9036 result = lu.rpc.call_blockdev_create(node, device, device.size,
9037 instance.name, force_open, info)
9038 result.Raise("Can't create block device %s on"
9039 " node %s for instance %s" % (device, node, instance.name))
9040 if device.physical_id is None:
9041 device.physical_id = result.payload
9044 def _GenerateUniqueNames(lu, exts):
9045 """Generate a suitable LV name.
9047 This will generate a logical volume name for the given instance.
9052 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9053 results.append("%s%s" % (new_id, val))
9057 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9058 iv_name, p_minor, s_minor):
9059 """Generate a drbd8 device complete with its children.
9062 assert len(vgnames) == len(names) == 2
9063 port = lu.cfg.AllocatePort()
9064 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9066 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9067 logical_id=(vgnames[0], names[0]),
9069 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9070 size=constants.DRBD_META_SIZE,
9071 logical_id=(vgnames[1], names[1]),
9073 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9074 logical_id=(primary, secondary, port,
9077 children=[dev_data, dev_meta],
9078 iv_name=iv_name, params={})
9082 _DISK_TEMPLATE_NAME_PREFIX = {
9083 constants.DT_PLAIN: "",
9084 constants.DT_RBD: ".rbd",
9088 _DISK_TEMPLATE_DEVICE_TYPE = {
9089 constants.DT_PLAIN: constants.LD_LV,
9090 constants.DT_FILE: constants.LD_FILE,
9091 constants.DT_SHARED_FILE: constants.LD_FILE,
9092 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9093 constants.DT_RBD: constants.LD_RBD,
9097 def _GenerateDiskTemplate(
9098 lu, template_name, instance_name, primary_node, secondary_nodes,
9099 disk_info, file_storage_dir, file_driver, base_index,
9100 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9101 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9102 """Generate the entire disk layout for a given template type.
9105 #TODO: compute space requirements
9107 vgname = lu.cfg.GetVGName()
9108 disk_count = len(disk_info)
9111 if template_name == constants.DT_DISKLESS:
9113 elif template_name == constants.DT_DRBD8:
9114 if len(secondary_nodes) != 1:
9115 raise errors.ProgrammerError("Wrong template configuration")
9116 remote_node = secondary_nodes[0]
9117 minors = lu.cfg.AllocateDRBDMinor(
9118 [primary_node, remote_node] * len(disk_info), instance_name)
9120 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9122 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9125 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9126 for i in range(disk_count)]):
9127 names.append(lv_prefix + "_data")
9128 names.append(lv_prefix + "_meta")
9129 for idx, disk in enumerate(disk_info):
9130 disk_index = idx + base_index
9131 data_vg = disk.get(constants.IDISK_VG, vgname)
9132 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9133 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9134 disk[constants.IDISK_SIZE],
9136 names[idx * 2:idx * 2 + 2],
9137 "disk/%d" % disk_index,
9138 minors[idx * 2], minors[idx * 2 + 1])
9139 disk_dev.mode = disk[constants.IDISK_MODE]
9140 disks.append(disk_dev)
9143 raise errors.ProgrammerError("Wrong template configuration")
9145 if template_name == constants.DT_FILE:
9147 elif template_name == constants.DT_SHARED_FILE:
9148 _req_shr_file_storage()
9150 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9151 if name_prefix is None:
9154 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9155 (name_prefix, base_index + i)
9156 for i in range(disk_count)])
9158 if template_name == constants.DT_PLAIN:
9160 def logical_id_fn(idx, _, disk):
9161 vg = disk.get(constants.IDISK_VG, vgname)
9162 return (vg, names[idx])
9164 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9166 lambda _, disk_index, disk: (file_driver,
9167 "%s/disk%d" % (file_storage_dir,
9169 elif template_name == constants.DT_BLOCK:
9171 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9172 disk[constants.IDISK_ADOPT])
9173 elif template_name == constants.DT_RBD:
9174 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9176 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9178 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9180 for idx, disk in enumerate(disk_info):
9181 disk_index = idx + base_index
9182 size = disk[constants.IDISK_SIZE]
9183 feedback_fn("* disk %s, size %s" %
9184 (disk_index, utils.FormatUnit(size, "h")))
9185 disks.append(objects.Disk(dev_type=dev_type, size=size,
9186 logical_id=logical_id_fn(idx, disk_index, disk),
9187 iv_name="disk/%d" % disk_index,
9188 mode=disk[constants.IDISK_MODE],
9194 def _GetInstanceInfoText(instance):
9195 """Compute that text that should be added to the disk's metadata.
9198 return "originstname+%s" % instance.name
9201 def _CalcEta(time_taken, written, total_size):
9202 """Calculates the ETA based on size written and total size.
9204 @param time_taken: The time taken so far
9205 @param written: amount written so far
9206 @param total_size: The total size of data to be written
9207 @return: The remaining time in seconds
9210 avg_time = time_taken / float(written)
9211 return (total_size - written) * avg_time
9214 def _WipeDisks(lu, instance, disks=None):
9215 """Wipes instance disks.
9217 @type lu: L{LogicalUnit}
9218 @param lu: the logical unit on whose behalf we execute
9219 @type instance: L{objects.Instance}
9220 @param instance: the instance whose disks we should create
9221 @return: the success of the wipe
9224 node = instance.primary_node
9227 disks = [(idx, disk, 0)
9228 for (idx, disk) in enumerate(instance.disks)]
9230 for (_, device, _) in disks:
9231 lu.cfg.SetDiskID(device, node)
9233 logging.info("Pausing synchronization of disks of instance '%s'",
9235 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9236 (map(compat.snd, disks),
9239 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9241 for idx, success in enumerate(result.payload):
9243 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9244 " failed", idx, instance.name)
9247 for (idx, device, offset) in disks:
9248 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9249 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9251 int(min(constants.MAX_WIPE_CHUNK,
9252 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9256 start_time = time.time()
9261 info_text = (" (from %s to %s)" %
9262 (utils.FormatUnit(offset, "h"),
9263 utils.FormatUnit(size, "h")))
9265 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9267 logging.info("Wiping disk %d for instance %s on node %s using"
9268 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9270 while offset < size:
9271 wipe_size = min(wipe_chunk_size, size - offset)
9273 logging.debug("Wiping disk %d, offset %s, chunk %s",
9274 idx, offset, wipe_size)
9276 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9278 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9279 (idx, offset, wipe_size))
9283 if now - last_output >= 60:
9284 eta = _CalcEta(now - start_time, offset, size)
9285 lu.LogInfo(" - done: %.1f%% ETA: %s",
9286 offset / float(size) * 100, utils.FormatSeconds(eta))
9289 logging.info("Resuming synchronization of disks for instance '%s'",
9292 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9293 (map(compat.snd, disks),
9298 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9299 node, result.fail_msg)
9301 for idx, success in enumerate(result.payload):
9303 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9304 " failed", idx, instance.name)
9307 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9308 """Create all disks for an instance.
9310 This abstracts away some work from AddInstance.
9312 @type lu: L{LogicalUnit}
9313 @param lu: the logical unit on whose behalf we execute
9314 @type instance: L{objects.Instance}
9315 @param instance: the instance whose disks we should create
9317 @param to_skip: list of indices to skip
9318 @type target_node: string
9319 @param target_node: if passed, overrides the target node for creation
9321 @return: the success of the creation
9324 info = _GetInstanceInfoText(instance)
9325 if target_node is None:
9326 pnode = instance.primary_node
9327 all_nodes = instance.all_nodes
9332 if instance.disk_template in constants.DTS_FILEBASED:
9333 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9334 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9336 result.Raise("Failed to create directory '%s' on"
9337 " node %s" % (file_storage_dir, pnode))
9339 # Note: this needs to be kept in sync with adding of disks in
9340 # LUInstanceSetParams
9341 for idx, device in enumerate(instance.disks):
9342 if to_skip and idx in to_skip:
9344 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9346 for node in all_nodes:
9347 f_create = node == pnode
9348 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9351 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9352 """Remove all disks for an instance.
9354 This abstracts away some work from `AddInstance()` and
9355 `RemoveInstance()`. Note that in case some of the devices couldn't
9356 be removed, the removal will continue with the other ones (compare
9357 with `_CreateDisks()`).
9359 @type lu: L{LogicalUnit}
9360 @param lu: the logical unit on whose behalf we execute
9361 @type instance: L{objects.Instance}
9362 @param instance: the instance whose disks we should remove
9363 @type target_node: string
9364 @param target_node: used to override the node on which to remove the disks
9366 @return: the success of the removal
9369 logging.info("Removing block devices for instance %s", instance.name)
9372 ports_to_release = set()
9373 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9374 for (idx, device) in enumerate(anno_disks):
9376 edata = [(target_node, device)]
9378 edata = device.ComputeNodeTree(instance.primary_node)
9379 for node, disk in edata:
9380 lu.cfg.SetDiskID(disk, node)
9381 result = lu.rpc.call_blockdev_remove(node, disk)
9383 lu.LogWarning("Could not remove disk %s on node %s,"
9384 " continuing anyway: %s", idx, node, result.fail_msg)
9385 if not (result.offline and node != instance.primary_node):
9388 # if this is a DRBD disk, return its port to the pool
9389 if device.dev_type in constants.LDS_DRBD:
9390 ports_to_release.add(device.logical_id[2])
9392 if all_result or ignore_failures:
9393 for port in ports_to_release:
9394 lu.cfg.AddTcpUdpPort(port)
9396 if instance.disk_template in constants.DTS_FILEBASED:
9397 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9401 tgt = instance.primary_node
9402 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9404 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9405 file_storage_dir, instance.primary_node, result.fail_msg)
9411 def _ComputeDiskSizePerVG(disk_template, disks):
9412 """Compute disk size requirements in the volume group
9415 def _compute(disks, payload):
9416 """Universal algorithm.
9421 vgs[disk[constants.IDISK_VG]] = \
9422 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9426 # Required free disk space as a function of disk and swap space
9428 constants.DT_DISKLESS: {},
9429 constants.DT_PLAIN: _compute(disks, 0),
9430 # 128 MB are added for drbd metadata for each disk
9431 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9432 constants.DT_FILE: {},
9433 constants.DT_SHARED_FILE: {},
9436 if disk_template not in req_size_dict:
9437 raise errors.ProgrammerError("Disk template '%s' size requirement"
9438 " is unknown" % disk_template)
9440 return req_size_dict[disk_template]
9443 def _FilterVmNodes(lu, nodenames):
9444 """Filters out non-vm_capable nodes from a list.
9446 @type lu: L{LogicalUnit}
9447 @param lu: the logical unit for which we check
9448 @type nodenames: list
9449 @param nodenames: the list of nodes on which we should check
9451 @return: the list of vm-capable nodes
9454 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9455 return [name for name in nodenames if name not in vm_nodes]
9458 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9459 """Hypervisor parameter validation.
9461 This function abstract the hypervisor parameter validation to be
9462 used in both instance create and instance modify.
9464 @type lu: L{LogicalUnit}
9465 @param lu: the logical unit for which we check
9466 @type nodenames: list
9467 @param nodenames: the list of nodes on which we should check
9468 @type hvname: string
9469 @param hvname: the name of the hypervisor we should use
9470 @type hvparams: dict
9471 @param hvparams: the parameters which we need to check
9472 @raise errors.OpPrereqError: if the parameters are not valid
9475 nodenames = _FilterVmNodes(lu, nodenames)
9477 cluster = lu.cfg.GetClusterInfo()
9478 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9480 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9481 for node in nodenames:
9485 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9488 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9489 """OS parameters validation.
9491 @type lu: L{LogicalUnit}
9492 @param lu: the logical unit for which we check
9493 @type required: boolean
9494 @param required: whether the validation should fail if the OS is not
9496 @type nodenames: list
9497 @param nodenames: the list of nodes on which we should check
9498 @type osname: string
9499 @param osname: the name of the hypervisor we should use
9500 @type osparams: dict
9501 @param osparams: the parameters which we need to check
9502 @raise errors.OpPrereqError: if the parameters are not valid
9505 nodenames = _FilterVmNodes(lu, nodenames)
9506 result = lu.rpc.call_os_validate(nodenames, required, osname,
9507 [constants.OS_VALIDATE_PARAMETERS],
9509 for node, nres in result.items():
9510 # we don't check for offline cases since this should be run only
9511 # against the master node and/or an instance's nodes
9512 nres.Raise("OS Parameters validation failed on node %s" % node)
9513 if not nres.payload:
9514 lu.LogInfo("OS %s not found on node %s, validation skipped",
9518 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9519 """Wrapper around IAReqInstanceAlloc.
9521 @param op: The instance opcode
9522 @param disks: The computed disks
9523 @param nics: The computed nics
9524 @param beparams: The full filled beparams
9526 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9529 spindle_use = beparams[constants.BE_SPINDLE_USE]
9530 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9531 disk_template=op.disk_template,
9534 vcpus=beparams[constants.BE_VCPUS],
9535 memory=beparams[constants.BE_MAXMEM],
9536 spindle_use=spindle_use,
9538 nics=[n.ToDict() for n in nics],
9539 hypervisor=op.hypervisor)
9542 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9543 """Computes the nics.
9545 @param op: The instance opcode
9546 @param cluster: Cluster configuration object
9547 @param default_ip: The default ip to assign
9548 @param cfg: An instance of the configuration object
9549 @param ec_id: Execution context ID
9551 @returns: The build up nics
9556 nic_mode_req = nic.get(constants.INIC_MODE, None)
9557 nic_mode = nic_mode_req
9558 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9559 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9561 net = nic.get(constants.INIC_NETWORK, None)
9562 link = nic.get(constants.NIC_LINK, None)
9563 ip = nic.get(constants.INIC_IP, None)
9565 if net is None or net.lower() == constants.VALUE_NONE:
9568 if nic_mode_req is not None or link is not None:
9569 raise errors.OpPrereqError("If network is given, no mode or link"
9570 " is allowed to be passed",
9573 # ip validity checks
9574 if ip is None or ip.lower() == constants.VALUE_NONE:
9576 elif ip.lower() == constants.VALUE_AUTO:
9577 if not op.name_check:
9578 raise errors.OpPrereqError("IP address set to auto but name checks"
9579 " have been skipped",
9583 # We defer pool operations until later, so that the iallocator has
9584 # filled in the instance's node(s) dimara
9585 if ip.lower() == constants.NIC_IP_POOL:
9587 raise errors.OpPrereqError("if ip=pool, parameter network"
9588 " must be passed too",
9591 elif not netutils.IPAddress.IsValid(ip):
9592 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9597 # TODO: check the ip address for uniqueness
9598 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9599 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9602 # MAC address verification
9603 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9604 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9605 mac = utils.NormalizeAndValidateMac(mac)
9608 # TODO: We need to factor this out
9609 cfg.ReserveMAC(mac, ec_id)
9610 except errors.ReservationError:
9611 raise errors.OpPrereqError("MAC address %s already in use"
9612 " in cluster" % mac,
9613 errors.ECODE_NOTUNIQUE)
9615 # Build nic parameters
9618 nicparams[constants.NIC_MODE] = nic_mode
9620 nicparams[constants.NIC_LINK] = link
9622 check_params = cluster.SimpleFillNIC(nicparams)
9623 objects.NIC.CheckParameterSyntax(check_params)
9624 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9625 network=net, nicparams=nicparams))
9630 def _ComputeDisks(op, default_vg):
9631 """Computes the instance disks.
9633 @param op: The instance opcode
9634 @param default_vg: The default_vg to assume
9636 @return: The computer disks
9640 for disk in op.disks:
9641 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9642 if mode not in constants.DISK_ACCESS_SET:
9643 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9644 mode, errors.ECODE_INVAL)
9645 size = disk.get(constants.IDISK_SIZE, None)
9647 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9650 except (TypeError, ValueError):
9651 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9654 data_vg = disk.get(constants.IDISK_VG, default_vg)
9656 constants.IDISK_SIZE: size,
9657 constants.IDISK_MODE: mode,
9658 constants.IDISK_VG: data_vg,
9660 if constants.IDISK_METAVG in disk:
9661 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9662 if constants.IDISK_ADOPT in disk:
9663 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9664 disks.append(new_disk)
9669 def _ComputeFullBeParams(op, cluster):
9670 """Computes the full beparams.
9672 @param op: The instance opcode
9673 @param cluster: The cluster config object
9675 @return: The fully filled beparams
9678 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9679 for param, value in op.beparams.iteritems():
9680 if value == constants.VALUE_AUTO:
9681 op.beparams[param] = default_beparams[param]
9682 objects.UpgradeBeParams(op.beparams)
9683 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9684 return cluster.SimpleFillBE(op.beparams)
9687 class LUInstanceCreate(LogicalUnit):
9688 """Create an instance.
9691 HPATH = "instance-add"
9692 HTYPE = constants.HTYPE_INSTANCE
9695 def CheckArguments(self):
9699 # do not require name_check to ease forward/backward compatibility
9701 if self.op.no_install and self.op.start:
9702 self.LogInfo("No-installation mode selected, disabling startup")
9703 self.op.start = False
9704 # validate/normalize the instance name
9705 self.op.instance_name = \
9706 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9708 if self.op.ip_check and not self.op.name_check:
9709 # TODO: make the ip check more flexible and not depend on the name check
9710 raise errors.OpPrereqError("Cannot do IP address check without a name"
9711 " check", errors.ECODE_INVAL)
9713 # check nics' parameter names
9714 for nic in self.op.nics:
9715 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9717 # check disks. parameter names and consistent adopt/no-adopt strategy
9718 has_adopt = has_no_adopt = False
9719 for disk in self.op.disks:
9720 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9721 if constants.IDISK_ADOPT in disk:
9725 if has_adopt and has_no_adopt:
9726 raise errors.OpPrereqError("Either all disks are adopted or none is",
9729 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9730 raise errors.OpPrereqError("Disk adoption is not supported for the"
9731 " '%s' disk template" %
9732 self.op.disk_template,
9734 if self.op.iallocator is not None:
9735 raise errors.OpPrereqError("Disk adoption not allowed with an"
9736 " iallocator script", errors.ECODE_INVAL)
9737 if self.op.mode == constants.INSTANCE_IMPORT:
9738 raise errors.OpPrereqError("Disk adoption not allowed for"
9739 " instance import", errors.ECODE_INVAL)
9741 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9742 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9743 " but no 'adopt' parameter given" %
9744 self.op.disk_template,
9747 self.adopt_disks = has_adopt
9749 # instance name verification
9750 if self.op.name_check:
9751 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9752 self.op.instance_name = self.hostname1.name
9753 # used in CheckPrereq for ip ping check
9754 self.check_ip = self.hostname1.ip
9756 self.check_ip = None
9758 # file storage checks
9759 if (self.op.file_driver and
9760 not self.op.file_driver in constants.FILE_DRIVER):
9761 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9762 self.op.file_driver, errors.ECODE_INVAL)
9764 if self.op.disk_template == constants.DT_FILE:
9765 opcodes.RequireFileStorage()
9766 elif self.op.disk_template == constants.DT_SHARED_FILE:
9767 opcodes.RequireSharedFileStorage()
9769 ### Node/iallocator related checks
9770 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9772 if self.op.pnode is not None:
9773 if self.op.disk_template in constants.DTS_INT_MIRROR:
9774 if self.op.snode is None:
9775 raise errors.OpPrereqError("The networked disk templates need"
9776 " a mirror node", errors.ECODE_INVAL)
9778 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9780 self.op.snode = None
9782 self._cds = _GetClusterDomainSecret()
9784 if self.op.mode == constants.INSTANCE_IMPORT:
9785 # On import force_variant must be True, because if we forced it at
9786 # initial install, our only chance when importing it back is that it
9788 self.op.force_variant = True
9790 if self.op.no_install:
9791 self.LogInfo("No-installation mode has no effect during import")
9793 elif self.op.mode == constants.INSTANCE_CREATE:
9794 if self.op.os_type is None:
9795 raise errors.OpPrereqError("No guest OS specified",
9797 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9798 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9799 " installation" % self.op.os_type,
9801 if self.op.disk_template is None:
9802 raise errors.OpPrereqError("No disk template specified",
9805 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9806 # Check handshake to ensure both clusters have the same domain secret
9807 src_handshake = self.op.source_handshake
9808 if not src_handshake:
9809 raise errors.OpPrereqError("Missing source handshake",
9812 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9815 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9818 # Load and check source CA
9819 self.source_x509_ca_pem = self.op.source_x509_ca
9820 if not self.source_x509_ca_pem:
9821 raise errors.OpPrereqError("Missing source X509 CA",
9825 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9827 except OpenSSL.crypto.Error, err:
9828 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9829 (err, ), errors.ECODE_INVAL)
9831 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9832 if errcode is not None:
9833 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9836 self.source_x509_ca = cert
9838 src_instance_name = self.op.source_instance_name
9839 if not src_instance_name:
9840 raise errors.OpPrereqError("Missing source instance name",
9843 self.source_instance_name = \
9844 netutils.GetHostname(name=src_instance_name).name
9847 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9848 self.op.mode, errors.ECODE_INVAL)
9850 def ExpandNames(self):
9851 """ExpandNames for CreateInstance.
9853 Figure out the right locks for instance creation.
9856 self.needed_locks = {}
9858 instance_name = self.op.instance_name
9859 # this is just a preventive check, but someone might still add this
9860 # instance in the meantime, and creation will fail at lock-add time
9861 if instance_name in self.cfg.GetInstanceList():
9862 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9863 instance_name, errors.ECODE_EXISTS)
9865 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9867 if self.op.iallocator:
9868 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9869 # specifying a group on instance creation and then selecting nodes from
9871 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9872 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9874 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9875 nodelist = [self.op.pnode]
9876 if self.op.snode is not None:
9877 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9878 nodelist.append(self.op.snode)
9879 self.needed_locks[locking.LEVEL_NODE] = nodelist
9881 # in case of import lock the source node too
9882 if self.op.mode == constants.INSTANCE_IMPORT:
9883 src_node = self.op.src_node
9884 src_path = self.op.src_path
9886 if src_path is None:
9887 self.op.src_path = src_path = self.op.instance_name
9889 if src_node is None:
9890 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9891 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9892 self.op.src_node = None
9893 if os.path.isabs(src_path):
9894 raise errors.OpPrereqError("Importing an instance from a path"
9895 " requires a source node option",
9898 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9899 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9900 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9901 if not os.path.isabs(src_path):
9902 self.op.src_path = src_path = \
9903 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9905 self.needed_locks[locking.LEVEL_NODE_RES] = \
9906 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9908 def _RunAllocator(self):
9909 """Run the allocator based on input opcode.
9912 #TODO Export network to iallocator so that it chooses a pnode
9913 # in a nodegroup that has the desired network connected to
9914 req = _CreateInstanceAllocRequest(self.op, self.disks,
9915 self.nics, self.be_full)
9916 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9918 ial.Run(self.op.iallocator)
9921 raise errors.OpPrereqError("Can't compute nodes using"
9922 " iallocator '%s': %s" %
9923 (self.op.iallocator, ial.info),
9925 self.op.pnode = ial.result[0]
9926 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9927 self.op.instance_name, self.op.iallocator,
9928 utils.CommaJoin(ial.result))
9930 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9932 if req.RequiredNodes() == 2:
9933 self.op.snode = ial.result[1]
9935 def BuildHooksEnv(self):
9938 This runs on master, primary and secondary nodes of the instance.
9942 "ADD_MODE": self.op.mode,
9944 if self.op.mode == constants.INSTANCE_IMPORT:
9945 env["SRC_NODE"] = self.op.src_node
9946 env["SRC_PATH"] = self.op.src_path
9947 env["SRC_IMAGES"] = self.src_images
9949 env.update(_BuildInstanceHookEnv(
9950 name=self.op.instance_name,
9951 primary_node=self.op.pnode,
9952 secondary_nodes=self.secondaries,
9953 status=self.op.start,
9954 os_type=self.op.os_type,
9955 minmem=self.be_full[constants.BE_MINMEM],
9956 maxmem=self.be_full[constants.BE_MAXMEM],
9957 vcpus=self.be_full[constants.BE_VCPUS],
9958 nics=_NICListToTuple(self, self.nics),
9959 disk_template=self.op.disk_template,
9960 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9961 for d in self.disks],
9964 hypervisor_name=self.op.hypervisor,
9970 def BuildHooksNodes(self):
9971 """Build hooks nodes.
9974 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9977 def _ReadExportInfo(self):
9978 """Reads the export information from disk.
9980 It will override the opcode source node and path with the actual
9981 information, if these two were not specified before.
9983 @return: the export information
9986 assert self.op.mode == constants.INSTANCE_IMPORT
9988 src_node = self.op.src_node
9989 src_path = self.op.src_path
9991 if src_node is None:
9992 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9993 exp_list = self.rpc.call_export_list(locked_nodes)
9995 for node in exp_list:
9996 if exp_list[node].fail_msg:
9998 if src_path in exp_list[node].payload:
10000 self.op.src_node = src_node = node
10001 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10005 raise errors.OpPrereqError("No export found for relative path %s" %
10006 src_path, errors.ECODE_INVAL)
10008 _CheckNodeOnline(self, src_node)
10009 result = self.rpc.call_export_info(src_node, src_path)
10010 result.Raise("No export or invalid export found in dir %s" % src_path)
10012 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10013 if not export_info.has_section(constants.INISECT_EXP):
10014 raise errors.ProgrammerError("Corrupted export config",
10015 errors.ECODE_ENVIRON)
10017 ei_version = export_info.get(constants.INISECT_EXP, "version")
10018 if (int(ei_version) != constants.EXPORT_VERSION):
10019 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10020 (ei_version, constants.EXPORT_VERSION),
10021 errors.ECODE_ENVIRON)
10024 def _ReadExportParams(self, einfo):
10025 """Use export parameters as defaults.
10027 In case the opcode doesn't specify (as in override) some instance
10028 parameters, then try to use them from the export information, if
10029 that declares them.
10032 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10034 if self.op.disk_template is None:
10035 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10036 self.op.disk_template = einfo.get(constants.INISECT_INS,
10038 if self.op.disk_template not in constants.DISK_TEMPLATES:
10039 raise errors.OpPrereqError("Disk template specified in configuration"
10040 " file is not one of the allowed values:"
10042 " ".join(constants.DISK_TEMPLATES),
10043 errors.ECODE_INVAL)
10045 raise errors.OpPrereqError("No disk template specified and the export"
10046 " is missing the disk_template information",
10047 errors.ECODE_INVAL)
10049 if not self.op.disks:
10051 # TODO: import the disk iv_name too
10052 for idx in range(constants.MAX_DISKS):
10053 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10054 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10055 disks.append({constants.IDISK_SIZE: disk_sz})
10056 self.op.disks = disks
10057 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10058 raise errors.OpPrereqError("No disk info specified and the export"
10059 " is missing the disk information",
10060 errors.ECODE_INVAL)
10062 if not self.op.nics:
10064 for idx in range(constants.MAX_NICS):
10065 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10067 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10068 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10073 self.op.nics = nics
10075 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10076 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10078 if (self.op.hypervisor is None and
10079 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10080 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10082 if einfo.has_section(constants.INISECT_HYP):
10083 # use the export parameters but do not override the ones
10084 # specified by the user
10085 for name, value in einfo.items(constants.INISECT_HYP):
10086 if name not in self.op.hvparams:
10087 self.op.hvparams[name] = value
10089 if einfo.has_section(constants.INISECT_BEP):
10090 # use the parameters, without overriding
10091 for name, value in einfo.items(constants.INISECT_BEP):
10092 if name not in self.op.beparams:
10093 self.op.beparams[name] = value
10094 # Compatibility for the old "memory" be param
10095 if name == constants.BE_MEMORY:
10096 if constants.BE_MAXMEM not in self.op.beparams:
10097 self.op.beparams[constants.BE_MAXMEM] = value
10098 if constants.BE_MINMEM not in self.op.beparams:
10099 self.op.beparams[constants.BE_MINMEM] = value
10101 # try to read the parameters old style, from the main section
10102 for name in constants.BES_PARAMETERS:
10103 if (name not in self.op.beparams and
10104 einfo.has_option(constants.INISECT_INS, name)):
10105 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10107 if einfo.has_section(constants.INISECT_OSP):
10108 # use the parameters, without overriding
10109 for name, value in einfo.items(constants.INISECT_OSP):
10110 if name not in self.op.osparams:
10111 self.op.osparams[name] = value
10113 def _RevertToDefaults(self, cluster):
10114 """Revert the instance parameters to the default values.
10118 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10119 for name in self.op.hvparams.keys():
10120 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10121 del self.op.hvparams[name]
10123 be_defs = cluster.SimpleFillBE({})
10124 for name in self.op.beparams.keys():
10125 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10126 del self.op.beparams[name]
10128 nic_defs = cluster.SimpleFillNIC({})
10129 for nic in self.op.nics:
10130 for name in constants.NICS_PARAMETERS:
10131 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10134 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10135 for name in self.op.osparams.keys():
10136 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10137 del self.op.osparams[name]
10139 def _CalculateFileStorageDir(self):
10140 """Calculate final instance file storage dir.
10143 # file storage dir calculation/check
10144 self.instance_file_storage_dir = None
10145 if self.op.disk_template in constants.DTS_FILEBASED:
10146 # build the full file storage dir path
10149 if self.op.disk_template == constants.DT_SHARED_FILE:
10150 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10152 get_fsd_fn = self.cfg.GetFileStorageDir
10154 cfg_storagedir = get_fsd_fn()
10155 if not cfg_storagedir:
10156 raise errors.OpPrereqError("Cluster file storage dir not defined",
10157 errors.ECODE_STATE)
10158 joinargs.append(cfg_storagedir)
10160 if self.op.file_storage_dir is not None:
10161 joinargs.append(self.op.file_storage_dir)
10163 joinargs.append(self.op.instance_name)
10165 # pylint: disable=W0142
10166 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10168 def CheckPrereq(self): # pylint: disable=R0914
10169 """Check prerequisites.
10172 self._CalculateFileStorageDir()
10174 if self.op.mode == constants.INSTANCE_IMPORT:
10175 export_info = self._ReadExportInfo()
10176 self._ReadExportParams(export_info)
10177 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10179 self._old_instance_name = None
10181 if (not self.cfg.GetVGName() and
10182 self.op.disk_template not in constants.DTS_NOT_LVM):
10183 raise errors.OpPrereqError("Cluster does not support lvm-based"
10184 " instances", errors.ECODE_STATE)
10186 if (self.op.hypervisor is None or
10187 self.op.hypervisor == constants.VALUE_AUTO):
10188 self.op.hypervisor = self.cfg.GetHypervisorType()
10190 cluster = self.cfg.GetClusterInfo()
10191 enabled_hvs = cluster.enabled_hypervisors
10192 if self.op.hypervisor not in enabled_hvs:
10193 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10195 (self.op.hypervisor, ",".join(enabled_hvs)),
10196 errors.ECODE_STATE)
10198 # Check tag validity
10199 for tag in self.op.tags:
10200 objects.TaggableObject.ValidateTag(tag)
10202 # check hypervisor parameter syntax (locally)
10203 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10204 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10206 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10207 hv_type.CheckParameterSyntax(filled_hvp)
10208 self.hv_full = filled_hvp
10209 # check that we don't specify global parameters on an instance
10210 _CheckGlobalHvParams(self.op.hvparams)
10212 # fill and remember the beparams dict
10213 self.be_full = _ComputeFullBeParams(self.op, cluster)
10215 # build os parameters
10216 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10218 # now that hvp/bep are in final format, let's reset to defaults,
10220 if self.op.identify_defaults:
10221 self._RevertToDefaults(cluster)
10224 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10225 self.proc.GetECId())
10227 # disk checks/pre-build
10228 default_vg = self.cfg.GetVGName()
10229 self.disks = _ComputeDisks(self.op, default_vg)
10231 if self.op.mode == constants.INSTANCE_IMPORT:
10233 for idx in range(len(self.disks)):
10234 option = "disk%d_dump" % idx
10235 if export_info.has_option(constants.INISECT_INS, option):
10236 # FIXME: are the old os-es, disk sizes, etc. useful?
10237 export_name = export_info.get(constants.INISECT_INS, option)
10238 image = utils.PathJoin(self.op.src_path, export_name)
10239 disk_images.append(image)
10241 disk_images.append(False)
10243 self.src_images = disk_images
10245 if self.op.instance_name == self._old_instance_name:
10246 for idx, nic in enumerate(self.nics):
10247 if nic.mac == constants.VALUE_AUTO:
10248 nic_mac_ini = "nic%d_mac" % idx
10249 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10251 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10253 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10254 if self.op.ip_check:
10255 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10256 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10257 (self.check_ip, self.op.instance_name),
10258 errors.ECODE_NOTUNIQUE)
10260 #### mac address generation
10261 # By generating here the mac address both the allocator and the hooks get
10262 # the real final mac address rather than the 'auto' or 'generate' value.
10263 # There is a race condition between the generation and the instance object
10264 # creation, which means that we know the mac is valid now, but we're not
10265 # sure it will be when we actually add the instance. If things go bad
10266 # adding the instance will abort because of a duplicate mac, and the
10267 # creation job will fail.
10268 for nic in self.nics:
10269 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10270 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10274 if self.op.iallocator is not None:
10275 self._RunAllocator()
10277 # Release all unneeded node locks
10278 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10279 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10280 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10281 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10283 assert (self.owned_locks(locking.LEVEL_NODE) ==
10284 self.owned_locks(locking.LEVEL_NODE_RES)), \
10285 "Node locks differ from node resource locks"
10287 #### node related checks
10289 # check primary node
10290 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10291 assert self.pnode is not None, \
10292 "Cannot retrieve locked node %s" % self.op.pnode
10294 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10295 pnode.name, errors.ECODE_STATE)
10297 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10298 pnode.name, errors.ECODE_STATE)
10299 if not pnode.vm_capable:
10300 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10301 " '%s'" % pnode.name, errors.ECODE_STATE)
10303 self.secondaries = []
10305 # Fill in any IPs from IP pools. This must happen here, because we need to
10306 # know the nic's primary node, as specified by the iallocator
10307 for idx, nic in enumerate(self.nics):
10309 if net is not None:
10310 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10311 if netparams is None:
10312 raise errors.OpPrereqError("No netparams found for network"
10313 " %s. Propably not connected to"
10314 " node's %s nodegroup" %
10315 (net, self.pnode.name),
10316 errors.ECODE_INVAL)
10317 self.LogInfo("NIC/%d inherits netparams %s" %
10318 (idx, netparams.values()))
10319 nic.nicparams = dict(netparams)
10320 if nic.ip is not None:
10321 if nic.ip.lower() == constants.NIC_IP_POOL:
10323 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10324 except errors.ReservationError:
10325 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10326 " from the address pool" % idx,
10327 errors.ECODE_STATE)
10328 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10331 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10332 except errors.ReservationError:
10333 raise errors.OpPrereqError("IP address %s already in use"
10334 " or does not belong to network %s" %
10336 errors.ECODE_NOTUNIQUE)
10338 # net is None, ip None or given
10339 if self.op.conflicts_check:
10340 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10342 # mirror node verification
10343 if self.op.disk_template in constants.DTS_INT_MIRROR:
10344 if self.op.snode == pnode.name:
10345 raise errors.OpPrereqError("The secondary node cannot be the"
10346 " primary node", errors.ECODE_INVAL)
10347 _CheckNodeOnline(self, self.op.snode)
10348 _CheckNodeNotDrained(self, self.op.snode)
10349 _CheckNodeVmCapable(self, self.op.snode)
10350 self.secondaries.append(self.op.snode)
10352 snode = self.cfg.GetNodeInfo(self.op.snode)
10353 if pnode.group != snode.group:
10354 self.LogWarning("The primary and secondary nodes are in two"
10355 " different node groups; the disk parameters"
10356 " from the first disk's node group will be"
10359 nodenames = [pnode.name] + self.secondaries
10361 # Verify instance specs
10362 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10364 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10365 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10366 constants.ISPEC_DISK_COUNT: len(self.disks),
10367 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10368 constants.ISPEC_NIC_COUNT: len(self.nics),
10369 constants.ISPEC_SPINDLE_USE: spindle_use,
10372 group_info = self.cfg.GetNodeGroup(pnode.group)
10373 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10374 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10375 if not self.op.ignore_ipolicy and res:
10376 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10377 (pnode.group, group_info.name, utils.CommaJoin(res)))
10378 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10380 if not self.adopt_disks:
10381 if self.op.disk_template == constants.DT_RBD:
10382 # _CheckRADOSFreeSpace() is just a placeholder.
10383 # Any function that checks prerequisites can be placed here.
10384 # Check if there is enough space on the RADOS cluster.
10385 _CheckRADOSFreeSpace()
10387 # Check lv size requirements, if not adopting
10388 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10389 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10391 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10392 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10393 disk[constants.IDISK_ADOPT])
10394 for disk in self.disks])
10395 if len(all_lvs) != len(self.disks):
10396 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10397 errors.ECODE_INVAL)
10398 for lv_name in all_lvs:
10400 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10401 # to ReserveLV uses the same syntax
10402 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10403 except errors.ReservationError:
10404 raise errors.OpPrereqError("LV named %s used by another instance" %
10405 lv_name, errors.ECODE_NOTUNIQUE)
10407 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10408 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10410 node_lvs = self.rpc.call_lv_list([pnode.name],
10411 vg_names.payload.keys())[pnode.name]
10412 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10413 node_lvs = node_lvs.payload
10415 delta = all_lvs.difference(node_lvs.keys())
10417 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10418 utils.CommaJoin(delta),
10419 errors.ECODE_INVAL)
10420 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10422 raise errors.OpPrereqError("Online logical volumes found, cannot"
10423 " adopt: %s" % utils.CommaJoin(online_lvs),
10424 errors.ECODE_STATE)
10425 # update the size of disk based on what is found
10426 for dsk in self.disks:
10427 dsk[constants.IDISK_SIZE] = \
10428 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10429 dsk[constants.IDISK_ADOPT])][0]))
10431 elif self.op.disk_template == constants.DT_BLOCK:
10432 # Normalize and de-duplicate device paths
10433 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10434 for disk in self.disks])
10435 if len(all_disks) != len(self.disks):
10436 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10437 errors.ECODE_INVAL)
10438 baddisks = [d for d in all_disks
10439 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10441 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10442 " cannot be adopted" %
10443 (utils.CommaJoin(baddisks),
10444 constants.ADOPTABLE_BLOCKDEV_ROOT),
10445 errors.ECODE_INVAL)
10447 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10448 list(all_disks))[pnode.name]
10449 node_disks.Raise("Cannot get block device information from node %s" %
10451 node_disks = node_disks.payload
10452 delta = all_disks.difference(node_disks.keys())
10454 raise errors.OpPrereqError("Missing block device(s): %s" %
10455 utils.CommaJoin(delta),
10456 errors.ECODE_INVAL)
10457 for dsk in self.disks:
10458 dsk[constants.IDISK_SIZE] = \
10459 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10461 # Verify instance specs
10462 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10464 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10465 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10466 constants.ISPEC_DISK_COUNT: len(self.disks),
10467 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10468 for disk in self.disks],
10469 constants.ISPEC_NIC_COUNT: len(self.nics),
10470 constants.ISPEC_SPINDLE_USE: spindle_use,
10473 group_info = self.cfg.GetNodeGroup(pnode.group)
10474 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10475 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10476 if not self.op.ignore_ipolicy and res:
10477 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10478 " policy: %s") % (pnode.group,
10479 utils.CommaJoin(res)),
10480 errors.ECODE_INVAL)
10482 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10484 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10485 # check OS parameters (remotely)
10486 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10488 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10490 # memory check on primary node
10491 #TODO(dynmem): use MINMEM for checking
10493 _CheckNodeFreeMemory(self, self.pnode.name,
10494 "creating instance %s" % self.op.instance_name,
10495 self.be_full[constants.BE_MAXMEM],
10496 self.op.hypervisor)
10498 self.dry_run_result = list(nodenames)
10500 def Exec(self, feedback_fn):
10501 """Create and add the instance to the cluster.
10504 instance = self.op.instance_name
10505 pnode_name = self.pnode.name
10507 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10508 self.owned_locks(locking.LEVEL_NODE)), \
10509 "Node locks differ from node resource locks"
10510 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10512 ht_kind = self.op.hypervisor
10513 if ht_kind in constants.HTS_REQ_PORT:
10514 network_port = self.cfg.AllocatePort()
10516 network_port = None
10518 # This is ugly but we got a chicken-egg problem here
10519 # We can only take the group disk parameters, as the instance
10520 # has no disks yet (we are generating them right here).
10521 node = self.cfg.GetNodeInfo(pnode_name)
10522 nodegroup = self.cfg.GetNodeGroup(node.group)
10523 disks = _GenerateDiskTemplate(self,
10524 self.op.disk_template,
10525 instance, pnode_name,
10528 self.instance_file_storage_dir,
10529 self.op.file_driver,
10532 self.cfg.GetGroupDiskParams(nodegroup))
10534 iobj = objects.Instance(name=instance, os=self.op.os_type,
10535 primary_node=pnode_name,
10536 nics=self.nics, disks=disks,
10537 disk_template=self.op.disk_template,
10538 admin_state=constants.ADMINST_DOWN,
10539 network_port=network_port,
10540 beparams=self.op.beparams,
10541 hvparams=self.op.hvparams,
10542 hypervisor=self.op.hypervisor,
10543 osparams=self.op.osparams,
10547 for tag in self.op.tags:
10550 if self.adopt_disks:
10551 if self.op.disk_template == constants.DT_PLAIN:
10552 # rename LVs to the newly-generated names; we need to construct
10553 # 'fake' LV disks with the old data, plus the new unique_id
10554 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10556 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10557 rename_to.append(t_dsk.logical_id)
10558 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10559 self.cfg.SetDiskID(t_dsk, pnode_name)
10560 result = self.rpc.call_blockdev_rename(pnode_name,
10561 zip(tmp_disks, rename_to))
10562 result.Raise("Failed to rename adoped LVs")
10564 feedback_fn("* creating instance disks...")
10566 _CreateDisks(self, iobj)
10567 except errors.OpExecError:
10568 self.LogWarning("Device creation failed, reverting...")
10570 _RemoveDisks(self, iobj)
10572 self.cfg.ReleaseDRBDMinors(instance)
10575 feedback_fn("adding instance %s to cluster config" % instance)
10577 self.cfg.AddInstance(iobj, self.proc.GetECId())
10579 # Declare that we don't want to remove the instance lock anymore, as we've
10580 # added the instance to the config
10581 del self.remove_locks[locking.LEVEL_INSTANCE]
10583 if self.op.mode == constants.INSTANCE_IMPORT:
10584 # Release unused nodes
10585 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10587 # Release all nodes
10588 _ReleaseLocks(self, locking.LEVEL_NODE)
10591 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10592 feedback_fn("* wiping instance disks...")
10594 _WipeDisks(self, iobj)
10595 except errors.OpExecError, err:
10596 logging.exception("Wiping disks failed")
10597 self.LogWarning("Wiping instance disks failed (%s)", err)
10601 # Something is already wrong with the disks, don't do anything else
10603 elif self.op.wait_for_sync:
10604 disk_abort = not _WaitForSync(self, iobj)
10605 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10606 # make sure the disks are not degraded (still sync-ing is ok)
10607 feedback_fn("* checking mirrors status")
10608 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10613 _RemoveDisks(self, iobj)
10614 self.cfg.RemoveInstance(iobj.name)
10615 # Make sure the instance lock gets removed
10616 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10617 raise errors.OpExecError("There are some degraded disks for"
10620 # Release all node resource locks
10621 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10623 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10624 # we need to set the disks ID to the primary node, since the
10625 # preceding code might or might have not done it, depending on
10626 # disk template and other options
10627 for disk in iobj.disks:
10628 self.cfg.SetDiskID(disk, pnode_name)
10629 if self.op.mode == constants.INSTANCE_CREATE:
10630 if not self.op.no_install:
10631 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10632 not self.op.wait_for_sync)
10634 feedback_fn("* pausing disk sync to install instance OS")
10635 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10638 for idx, success in enumerate(result.payload):
10640 logging.warn("pause-sync of instance %s for disk %d failed",
10643 feedback_fn("* running the instance OS create scripts...")
10644 # FIXME: pass debug option from opcode to backend
10646 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10647 self.op.debug_level)
10649 feedback_fn("* resuming disk sync")
10650 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10653 for idx, success in enumerate(result.payload):
10655 logging.warn("resume-sync of instance %s for disk %d failed",
10658 os_add_result.Raise("Could not add os for instance %s"
10659 " on node %s" % (instance, pnode_name))
10662 if self.op.mode == constants.INSTANCE_IMPORT:
10663 feedback_fn("* running the instance OS import scripts...")
10667 for idx, image in enumerate(self.src_images):
10671 # FIXME: pass debug option from opcode to backend
10672 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10673 constants.IEIO_FILE, (image, ),
10674 constants.IEIO_SCRIPT,
10675 (iobj.disks[idx], idx),
10677 transfers.append(dt)
10680 masterd.instance.TransferInstanceData(self, feedback_fn,
10681 self.op.src_node, pnode_name,
10682 self.pnode.secondary_ip,
10684 if not compat.all(import_result):
10685 self.LogWarning("Some disks for instance %s on node %s were not"
10686 " imported successfully" % (instance, pnode_name))
10688 rename_from = self._old_instance_name
10690 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10691 feedback_fn("* preparing remote import...")
10692 # The source cluster will stop the instance before attempting to make
10693 # a connection. In some cases stopping an instance can take a long
10694 # time, hence the shutdown timeout is added to the connection
10696 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10697 self.op.source_shutdown_timeout)
10698 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10700 assert iobj.primary_node == self.pnode.name
10702 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10703 self.source_x509_ca,
10704 self._cds, timeouts)
10705 if not compat.all(disk_results):
10706 # TODO: Should the instance still be started, even if some disks
10707 # failed to import (valid for local imports, too)?
10708 self.LogWarning("Some disks for instance %s on node %s were not"
10709 " imported successfully" % (instance, pnode_name))
10711 rename_from = self.source_instance_name
10714 # also checked in the prereq part
10715 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10718 # Run rename script on newly imported instance
10719 assert iobj.name == instance
10720 feedback_fn("Running rename script for %s" % instance)
10721 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10723 self.op.debug_level)
10724 if result.fail_msg:
10725 self.LogWarning("Failed to run rename script for %s on node"
10726 " %s: %s" % (instance, pnode_name, result.fail_msg))
10728 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10731 iobj.admin_state = constants.ADMINST_UP
10732 self.cfg.Update(iobj, feedback_fn)
10733 logging.info("Starting instance %s on node %s", instance, pnode_name)
10734 feedback_fn("* starting instance...")
10735 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10737 result.Raise("Could not start instance")
10739 return list(iobj.all_nodes)
10742 class LUInstanceMultiAlloc(NoHooksLU):
10743 """Allocates multiple instances at the same time.
10748 def CheckArguments(self):
10749 """Check arguments.
10753 for inst in self.op.instances:
10754 if inst.iallocator is not None:
10755 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10756 " instance objects", errors.ECODE_INVAL)
10757 nodes.append(bool(inst.pnode))
10758 if inst.disk_template in constants.DTS_INT_MIRROR:
10759 nodes.append(bool(inst.snode))
10761 has_nodes = compat.any(nodes)
10762 if compat.all(nodes) ^ has_nodes:
10763 raise errors.OpPrereqError("There are instance objects providing"
10764 " pnode/snode while others do not",
10765 errors.ECODE_INVAL)
10767 if self.op.iallocator is None:
10768 default_iallocator = self.cfg.GetDefaultIAllocator()
10769 if default_iallocator and has_nodes:
10770 self.op.iallocator = default_iallocator
10772 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10773 " given and no cluster-wide default"
10774 " iallocator found; please specify either"
10775 " an iallocator or nodes on the instances"
10776 " or set a cluster-wide default iallocator",
10777 errors.ECODE_INVAL)
10779 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10781 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10782 utils.CommaJoin(dups), errors.ECODE_INVAL)
10784 def ExpandNames(self):
10785 """Calculate the locks.
10788 self.share_locks = _ShareAll()
10789 self.needed_locks = {}
10791 if self.op.iallocator:
10792 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10793 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10796 for inst in self.op.instances:
10797 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10798 nodeslist.append(inst.pnode)
10799 if inst.snode is not None:
10800 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10801 nodeslist.append(inst.snode)
10803 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10804 # Lock resources of instance's primary and secondary nodes (copy to
10805 # prevent accidential modification)
10806 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10808 def CheckPrereq(self):
10809 """Check prerequisite.
10812 cluster = self.cfg.GetClusterInfo()
10813 default_vg = self.cfg.GetVGName()
10814 ec_id = self.proc.GetECId()
10816 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10817 _ComputeNics(op, cluster, None,
10819 _ComputeFullBeParams(op, cluster))
10820 for op in self.op.instances]
10822 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10823 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10825 ial.Run(self.op.iallocator)
10827 if not ial.success:
10828 raise errors.OpPrereqError("Can't compute nodes using"
10829 " iallocator '%s': %s" %
10830 (self.op.iallocator, ial.info),
10831 errors.ECODE_NORES)
10833 self.ia_result = ial.result
10835 if self.op.dry_run:
10836 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10837 constants.JOB_IDS_KEY: [],
10840 def _ConstructPartialResult(self):
10841 """Contructs the partial result.
10844 (allocatable, failed) = self.ia_result
10846 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10847 map(compat.fst, allocatable),
10848 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10851 def Exec(self, feedback_fn):
10852 """Executes the opcode.
10855 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10856 (allocatable, failed) = self.ia_result
10859 for (name, nodes) in allocatable:
10860 op = op2inst.pop(name)
10863 (op.pnode, op.snode) = nodes
10865 (op.pnode,) = nodes
10869 missing = set(op2inst.keys()) - set(failed)
10870 assert not missing, \
10871 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10873 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10876 def _CheckRADOSFreeSpace():
10877 """Compute disk size requirements inside the RADOS cluster.
10880 # For the RADOS cluster we assume there is always enough space.
10884 class LUInstanceConsole(NoHooksLU):
10885 """Connect to an instance's console.
10887 This is somewhat special in that it returns the command line that
10888 you need to run on the master node in order to connect to the
10894 def ExpandNames(self):
10895 self.share_locks = _ShareAll()
10896 self._ExpandAndLockInstance()
10898 def CheckPrereq(self):
10899 """Check prerequisites.
10901 This checks that the instance is in the cluster.
10904 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10905 assert self.instance is not None, \
10906 "Cannot retrieve locked instance %s" % self.op.instance_name
10907 _CheckNodeOnline(self, self.instance.primary_node)
10909 def Exec(self, feedback_fn):
10910 """Connect to the console of an instance
10913 instance = self.instance
10914 node = instance.primary_node
10916 node_insts = self.rpc.call_instance_list([node],
10917 [instance.hypervisor])[node]
10918 node_insts.Raise("Can't get node information from %s" % node)
10920 if instance.name not in node_insts.payload:
10921 if instance.admin_state == constants.ADMINST_UP:
10922 state = constants.INSTST_ERRORDOWN
10923 elif instance.admin_state == constants.ADMINST_DOWN:
10924 state = constants.INSTST_ADMINDOWN
10926 state = constants.INSTST_ADMINOFFLINE
10927 raise errors.OpExecError("Instance %s is not running (state %s)" %
10928 (instance.name, state))
10930 logging.debug("Connecting to console of %s on %s", instance.name, node)
10932 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10935 def _GetInstanceConsole(cluster, instance):
10936 """Returns console information for an instance.
10938 @type cluster: L{objects.Cluster}
10939 @type instance: L{objects.Instance}
10943 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10944 # beparams and hvparams are passed separately, to avoid editing the
10945 # instance and then saving the defaults in the instance itself.
10946 hvparams = cluster.FillHV(instance)
10947 beparams = cluster.FillBE(instance)
10948 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10950 assert console.instance == instance.name
10951 assert console.Validate()
10953 return console.ToDict()
10956 class LUInstanceReplaceDisks(LogicalUnit):
10957 """Replace the disks of an instance.
10960 HPATH = "mirrors-replace"
10961 HTYPE = constants.HTYPE_INSTANCE
10964 def CheckArguments(self):
10965 """Check arguments.
10968 remote_node = self.op.remote_node
10969 ialloc = self.op.iallocator
10970 if self.op.mode == constants.REPLACE_DISK_CHG:
10971 if remote_node is None and ialloc is None:
10972 raise errors.OpPrereqError("When changing the secondary either an"
10973 " iallocator script must be used or the"
10974 " new node given", errors.ECODE_INVAL)
10976 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10978 elif remote_node is not None or ialloc is not None:
10979 # Not replacing the secondary
10980 raise errors.OpPrereqError("The iallocator and new node options can"
10981 " only be used when changing the"
10982 " secondary node", errors.ECODE_INVAL)
10984 def ExpandNames(self):
10985 self._ExpandAndLockInstance()
10987 assert locking.LEVEL_NODE not in self.needed_locks
10988 assert locking.LEVEL_NODE_RES not in self.needed_locks
10989 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10991 assert self.op.iallocator is None or self.op.remote_node is None, \
10992 "Conflicting options"
10994 if self.op.remote_node is not None:
10995 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10997 # Warning: do not remove the locking of the new secondary here
10998 # unless DRBD8.AddChildren is changed to work in parallel;
10999 # currently it doesn't since parallel invocations of
11000 # FindUnusedMinor will conflict
11001 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11002 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11004 self.needed_locks[locking.LEVEL_NODE] = []
11005 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11007 if self.op.iallocator is not None:
11008 # iallocator will select a new node in the same group
11009 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11010 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11012 self.needed_locks[locking.LEVEL_NODE_RES] = []
11014 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11015 self.op.iallocator, self.op.remote_node,
11016 self.op.disks, self.op.early_release,
11017 self.op.ignore_ipolicy)
11019 self.tasklets = [self.replacer]
11021 def DeclareLocks(self, level):
11022 if level == locking.LEVEL_NODEGROUP:
11023 assert self.op.remote_node is None
11024 assert self.op.iallocator is not None
11025 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11027 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11028 # Lock all groups used by instance optimistically; this requires going
11029 # via the node before it's locked, requiring verification later on
11030 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11031 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11033 elif level == locking.LEVEL_NODE:
11034 if self.op.iallocator is not None:
11035 assert self.op.remote_node is None
11036 assert not self.needed_locks[locking.LEVEL_NODE]
11037 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11039 # Lock member nodes of all locked groups
11040 self.needed_locks[locking.LEVEL_NODE] = \
11042 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11043 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11045 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11047 self._LockInstancesNodes()
11049 elif level == locking.LEVEL_NODE_RES:
11051 self.needed_locks[locking.LEVEL_NODE_RES] = \
11052 self.needed_locks[locking.LEVEL_NODE]
11054 def BuildHooksEnv(self):
11055 """Build hooks env.
11057 This runs on the master, the primary and all the secondaries.
11060 instance = self.replacer.instance
11062 "MODE": self.op.mode,
11063 "NEW_SECONDARY": self.op.remote_node,
11064 "OLD_SECONDARY": instance.secondary_nodes[0],
11066 env.update(_BuildInstanceHookEnvByObject(self, instance))
11069 def BuildHooksNodes(self):
11070 """Build hooks nodes.
11073 instance = self.replacer.instance
11075 self.cfg.GetMasterNode(),
11076 instance.primary_node,
11078 if self.op.remote_node is not None:
11079 nl.append(self.op.remote_node)
11082 def CheckPrereq(self):
11083 """Check prerequisites.
11086 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11087 self.op.iallocator is None)
11089 # Verify if node group locks are still correct
11090 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11092 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11094 return LogicalUnit.CheckPrereq(self)
11097 class TLReplaceDisks(Tasklet):
11098 """Replaces disks for an instance.
11100 Note: Locking is not within the scope of this class.
11103 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11104 disks, early_release, ignore_ipolicy):
11105 """Initializes this class.
11108 Tasklet.__init__(self, lu)
11111 self.instance_name = instance_name
11113 self.iallocator_name = iallocator_name
11114 self.remote_node = remote_node
11116 self.early_release = early_release
11117 self.ignore_ipolicy = ignore_ipolicy
11120 self.instance = None
11121 self.new_node = None
11122 self.target_node = None
11123 self.other_node = None
11124 self.remote_node_info = None
11125 self.node_secondary_ip = None
11128 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11129 """Compute a new secondary node using an IAllocator.
11132 req = iallocator.IAReqRelocate(name=instance_name,
11133 relocate_from=list(relocate_from))
11134 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11136 ial.Run(iallocator_name)
11138 if not ial.success:
11139 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11140 " %s" % (iallocator_name, ial.info),
11141 errors.ECODE_NORES)
11143 remote_node_name = ial.result[0]
11145 lu.LogInfo("Selected new secondary for instance '%s': %s",
11146 instance_name, remote_node_name)
11148 return remote_node_name
11150 def _FindFaultyDisks(self, node_name):
11151 """Wrapper for L{_FindFaultyInstanceDisks}.
11154 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11157 def _CheckDisksActivated(self, instance):
11158 """Checks if the instance disks are activated.
11160 @param instance: The instance to check disks
11161 @return: True if they are activated, False otherwise
11164 nodes = instance.all_nodes
11166 for idx, dev in enumerate(instance.disks):
11168 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11169 self.cfg.SetDiskID(dev, node)
11171 result = _BlockdevFind(self, node, dev, instance)
11175 elif result.fail_msg or not result.payload:
11180 def CheckPrereq(self):
11181 """Check prerequisites.
11183 This checks that the instance is in the cluster.
11186 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11187 assert instance is not None, \
11188 "Cannot retrieve locked instance %s" % self.instance_name
11190 if instance.disk_template != constants.DT_DRBD8:
11191 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11192 " instances", errors.ECODE_INVAL)
11194 if len(instance.secondary_nodes) != 1:
11195 raise errors.OpPrereqError("The instance has a strange layout,"
11196 " expected one secondary but found %d" %
11197 len(instance.secondary_nodes),
11198 errors.ECODE_FAULT)
11200 instance = self.instance
11201 secondary_node = instance.secondary_nodes[0]
11203 if self.iallocator_name is None:
11204 remote_node = self.remote_node
11206 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11207 instance.name, instance.secondary_nodes)
11209 if remote_node is None:
11210 self.remote_node_info = None
11212 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11213 "Remote node '%s' is not locked" % remote_node
11215 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11216 assert self.remote_node_info is not None, \
11217 "Cannot retrieve locked node %s" % remote_node
11219 if remote_node == self.instance.primary_node:
11220 raise errors.OpPrereqError("The specified node is the primary node of"
11221 " the instance", errors.ECODE_INVAL)
11223 if remote_node == secondary_node:
11224 raise errors.OpPrereqError("The specified node is already the"
11225 " secondary node of the instance",
11226 errors.ECODE_INVAL)
11228 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11229 constants.REPLACE_DISK_CHG):
11230 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11231 errors.ECODE_INVAL)
11233 if self.mode == constants.REPLACE_DISK_AUTO:
11234 if not self._CheckDisksActivated(instance):
11235 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11236 " first" % self.instance_name,
11237 errors.ECODE_STATE)
11238 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11239 faulty_secondary = self._FindFaultyDisks(secondary_node)
11241 if faulty_primary and faulty_secondary:
11242 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11243 " one node and can not be repaired"
11244 " automatically" % self.instance_name,
11245 errors.ECODE_STATE)
11248 self.disks = faulty_primary
11249 self.target_node = instance.primary_node
11250 self.other_node = secondary_node
11251 check_nodes = [self.target_node, self.other_node]
11252 elif faulty_secondary:
11253 self.disks = faulty_secondary
11254 self.target_node = secondary_node
11255 self.other_node = instance.primary_node
11256 check_nodes = [self.target_node, self.other_node]
11262 # Non-automatic modes
11263 if self.mode == constants.REPLACE_DISK_PRI:
11264 self.target_node = instance.primary_node
11265 self.other_node = secondary_node
11266 check_nodes = [self.target_node, self.other_node]
11268 elif self.mode == constants.REPLACE_DISK_SEC:
11269 self.target_node = secondary_node
11270 self.other_node = instance.primary_node
11271 check_nodes = [self.target_node, self.other_node]
11273 elif self.mode == constants.REPLACE_DISK_CHG:
11274 self.new_node = remote_node
11275 self.other_node = instance.primary_node
11276 self.target_node = secondary_node
11277 check_nodes = [self.new_node, self.other_node]
11279 _CheckNodeNotDrained(self.lu, remote_node)
11280 _CheckNodeVmCapable(self.lu, remote_node)
11282 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11283 assert old_node_info is not None
11284 if old_node_info.offline and not self.early_release:
11285 # doesn't make sense to delay the release
11286 self.early_release = True
11287 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11288 " early-release mode", secondary_node)
11291 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11294 # If not specified all disks should be replaced
11296 self.disks = range(len(self.instance.disks))
11298 # TODO: This is ugly, but right now we can't distinguish between internal
11299 # submitted opcode and external one. We should fix that.
11300 if self.remote_node_info:
11301 # We change the node, lets verify it still meets instance policy
11302 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11303 cluster = self.cfg.GetClusterInfo()
11304 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11306 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11307 ignore=self.ignore_ipolicy)
11309 for node in check_nodes:
11310 _CheckNodeOnline(self.lu, node)
11312 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11315 if node_name is not None)
11317 # Release unneeded node and node resource locks
11318 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11319 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11320 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11322 # Release any owned node group
11323 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11325 # Check whether disks are valid
11326 for disk_idx in self.disks:
11327 instance.FindDisk(disk_idx)
11329 # Get secondary node IP addresses
11330 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11331 in self.cfg.GetMultiNodeInfo(touched_nodes))
11333 def Exec(self, feedback_fn):
11334 """Execute disk replacement.
11336 This dispatches the disk replacement to the appropriate handler.
11340 # Verify owned locks before starting operation
11341 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11342 assert set(owned_nodes) == set(self.node_secondary_ip), \
11343 ("Incorrect node locks, owning %s, expected %s" %
11344 (owned_nodes, self.node_secondary_ip.keys()))
11345 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11346 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11347 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11349 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11350 assert list(owned_instances) == [self.instance_name], \
11351 "Instance '%s' not locked" % self.instance_name
11353 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11354 "Should not own any node group lock at this point"
11357 feedback_fn("No disks need replacement for instance '%s'" %
11358 self.instance.name)
11361 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11362 (utils.CommaJoin(self.disks), self.instance.name))
11363 feedback_fn("Current primary node: %s", self.instance.primary_node)
11364 feedback_fn("Current seconary node: %s",
11365 utils.CommaJoin(self.instance.secondary_nodes))
11367 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11369 # Activate the instance disks if we're replacing them on a down instance
11371 _StartInstanceDisks(self.lu, self.instance, True)
11374 # Should we replace the secondary node?
11375 if self.new_node is not None:
11376 fn = self._ExecDrbd8Secondary
11378 fn = self._ExecDrbd8DiskOnly
11380 result = fn(feedback_fn)
11382 # Deactivate the instance disks if we're replacing them on a
11385 _SafeShutdownInstanceDisks(self.lu, self.instance)
11387 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11390 # Verify owned locks
11391 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11392 nodes = frozenset(self.node_secondary_ip)
11393 assert ((self.early_release and not owned_nodes) or
11394 (not self.early_release and not (set(owned_nodes) - nodes))), \
11395 ("Not owning the correct locks, early_release=%s, owned=%r,"
11396 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11400 def _CheckVolumeGroup(self, nodes):
11401 self.lu.LogInfo("Checking volume groups")
11403 vgname = self.cfg.GetVGName()
11405 # Make sure volume group exists on all involved nodes
11406 results = self.rpc.call_vg_list(nodes)
11408 raise errors.OpExecError("Can't list volume groups on the nodes")
11411 res = results[node]
11412 res.Raise("Error checking node %s" % node)
11413 if vgname not in res.payload:
11414 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11417 def _CheckDisksExistence(self, nodes):
11418 # Check disk existence
11419 for idx, dev in enumerate(self.instance.disks):
11420 if idx not in self.disks:
11424 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11425 self.cfg.SetDiskID(dev, node)
11427 result = _BlockdevFind(self, node, dev, self.instance)
11429 msg = result.fail_msg
11430 if msg or not result.payload:
11432 msg = "disk not found"
11433 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11436 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11437 for idx, dev in enumerate(self.instance.disks):
11438 if idx not in self.disks:
11441 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11444 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11445 on_primary, ldisk=ldisk):
11446 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11447 " replace disks for instance %s" %
11448 (node_name, self.instance.name))
11450 def _CreateNewStorage(self, node_name):
11451 """Create new storage on the primary or secondary node.
11453 This is only used for same-node replaces, not for changing the
11454 secondary node, hence we don't want to modify the existing disk.
11459 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11460 for idx, dev in enumerate(disks):
11461 if idx not in self.disks:
11464 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11466 self.cfg.SetDiskID(dev, node_name)
11468 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11469 names = _GenerateUniqueNames(self.lu, lv_names)
11471 (data_disk, meta_disk) = dev.children
11472 vg_data = data_disk.logical_id[0]
11473 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11474 logical_id=(vg_data, names[0]),
11475 params=data_disk.params)
11476 vg_meta = meta_disk.logical_id[0]
11477 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11478 size=constants.DRBD_META_SIZE,
11479 logical_id=(vg_meta, names[1]),
11480 params=meta_disk.params)
11482 new_lvs = [lv_data, lv_meta]
11483 old_lvs = [child.Copy() for child in dev.children]
11484 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11486 # we pass force_create=True to force the LVM creation
11487 for new_lv in new_lvs:
11488 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11489 _GetInstanceInfoText(self.instance), False)
11493 def _CheckDevices(self, node_name, iv_names):
11494 for name, (dev, _, _) in iv_names.iteritems():
11495 self.cfg.SetDiskID(dev, node_name)
11497 result = _BlockdevFind(self, node_name, dev, self.instance)
11499 msg = result.fail_msg
11500 if msg or not result.payload:
11502 msg = "disk not found"
11503 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11506 if result.payload.is_degraded:
11507 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11509 def _RemoveOldStorage(self, node_name, iv_names):
11510 for name, (_, old_lvs, _) in iv_names.iteritems():
11511 self.lu.LogInfo("Remove logical volumes for %s", name)
11514 self.cfg.SetDiskID(lv, node_name)
11516 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11518 self.lu.LogWarning("Can't remove old LV: %s", msg,
11519 hint="remove unused LVs manually")
11521 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11522 """Replace a disk on the primary or secondary for DRBD 8.
11524 The algorithm for replace is quite complicated:
11526 1. for each disk to be replaced:
11528 1. create new LVs on the target node with unique names
11529 1. detach old LVs from the drbd device
11530 1. rename old LVs to name_replaced.<time_t>
11531 1. rename new LVs to old LVs
11532 1. attach the new LVs (with the old names now) to the drbd device
11534 1. wait for sync across all devices
11536 1. for each modified disk:
11538 1. remove old LVs (which have the name name_replaces.<time_t>)
11540 Failures are not very well handled.
11545 # Step: check device activation
11546 self.lu.LogStep(1, steps_total, "Check device existence")
11547 self._CheckDisksExistence([self.other_node, self.target_node])
11548 self._CheckVolumeGroup([self.target_node, self.other_node])
11550 # Step: check other node consistency
11551 self.lu.LogStep(2, steps_total, "Check peer consistency")
11552 self._CheckDisksConsistency(self.other_node,
11553 self.other_node == self.instance.primary_node,
11556 # Step: create new storage
11557 self.lu.LogStep(3, steps_total, "Allocate new storage")
11558 iv_names = self._CreateNewStorage(self.target_node)
11560 # Step: for each lv, detach+rename*2+attach
11561 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11562 for dev, old_lvs, new_lvs in iv_names.itervalues():
11563 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11565 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11567 result.Raise("Can't detach drbd from local storage on node"
11568 " %s for device %s" % (self.target_node, dev.iv_name))
11570 #cfg.Update(instance)
11572 # ok, we created the new LVs, so now we know we have the needed
11573 # storage; as such, we proceed on the target node to rename
11574 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11575 # using the assumption that logical_id == physical_id (which in
11576 # turn is the unique_id on that node)
11578 # FIXME(iustin): use a better name for the replaced LVs
11579 temp_suffix = int(time.time())
11580 ren_fn = lambda d, suff: (d.physical_id[0],
11581 d.physical_id[1] + "_replaced-%s" % suff)
11583 # Build the rename list based on what LVs exist on the node
11584 rename_old_to_new = []
11585 for to_ren in old_lvs:
11586 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11587 if not result.fail_msg and result.payload:
11589 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11591 self.lu.LogInfo("Renaming the old LVs on the target node")
11592 result = self.rpc.call_blockdev_rename(self.target_node,
11594 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11596 # Now we rename the new LVs to the old LVs
11597 self.lu.LogInfo("Renaming the new LVs on the target node")
11598 rename_new_to_old = [(new, old.physical_id)
11599 for old, new in zip(old_lvs, new_lvs)]
11600 result = self.rpc.call_blockdev_rename(self.target_node,
11602 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11604 # Intermediate steps of in memory modifications
11605 for old, new in zip(old_lvs, new_lvs):
11606 new.logical_id = old.logical_id
11607 self.cfg.SetDiskID(new, self.target_node)
11609 # We need to modify old_lvs so that removal later removes the
11610 # right LVs, not the newly added ones; note that old_lvs is a
11612 for disk in old_lvs:
11613 disk.logical_id = ren_fn(disk, temp_suffix)
11614 self.cfg.SetDiskID(disk, self.target_node)
11616 # Now that the new lvs have the old name, we can add them to the device
11617 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11618 result = self.rpc.call_blockdev_addchildren(self.target_node,
11619 (dev, self.instance), new_lvs)
11620 msg = result.fail_msg
11622 for new_lv in new_lvs:
11623 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11626 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11627 hint=("cleanup manually the unused logical"
11629 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11631 cstep = itertools.count(5)
11633 if self.early_release:
11634 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11635 self._RemoveOldStorage(self.target_node, iv_names)
11636 # TODO: Check if releasing locks early still makes sense
11637 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11639 # Release all resource locks except those used by the instance
11640 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11641 keep=self.node_secondary_ip.keys())
11643 # Release all node locks while waiting for sync
11644 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11646 # TODO: Can the instance lock be downgraded here? Take the optional disk
11647 # shutdown in the caller into consideration.
11650 # This can fail as the old devices are degraded and _WaitForSync
11651 # does a combined result over all disks, so we don't check its return value
11652 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11653 _WaitForSync(self.lu, self.instance)
11655 # Check all devices manually
11656 self._CheckDevices(self.instance.primary_node, iv_names)
11658 # Step: remove old storage
11659 if not self.early_release:
11660 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11661 self._RemoveOldStorage(self.target_node, iv_names)
11663 def _ExecDrbd8Secondary(self, feedback_fn):
11664 """Replace the secondary node for DRBD 8.
11666 The algorithm for replace is quite complicated:
11667 - for all disks of the instance:
11668 - create new LVs on the new node with same names
11669 - shutdown the drbd device on the old secondary
11670 - disconnect the drbd network on the primary
11671 - create the drbd device on the new secondary
11672 - network attach the drbd on the primary, using an artifice:
11673 the drbd code for Attach() will connect to the network if it
11674 finds a device which is connected to the good local disks but
11675 not network enabled
11676 - wait for sync across all devices
11677 - remove all disks from the old secondary
11679 Failures are not very well handled.
11684 pnode = self.instance.primary_node
11686 # Step: check device activation
11687 self.lu.LogStep(1, steps_total, "Check device existence")
11688 self._CheckDisksExistence([self.instance.primary_node])
11689 self._CheckVolumeGroup([self.instance.primary_node])
11691 # Step: check other node consistency
11692 self.lu.LogStep(2, steps_total, "Check peer consistency")
11693 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11695 # Step: create new storage
11696 self.lu.LogStep(3, steps_total, "Allocate new storage")
11697 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11698 for idx, dev in enumerate(disks):
11699 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11700 (self.new_node, idx))
11701 # we pass force_create=True to force LVM creation
11702 for new_lv in dev.children:
11703 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11704 True, _GetInstanceInfoText(self.instance), False)
11706 # Step 4: dbrd minors and drbd setups changes
11707 # after this, we must manually remove the drbd minors on both the
11708 # error and the success paths
11709 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11710 minors = self.cfg.AllocateDRBDMinor([self.new_node
11711 for dev in self.instance.disks],
11712 self.instance.name)
11713 logging.debug("Allocated minors %r", minors)
11716 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11717 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11718 (self.new_node, idx))
11719 # create new devices on new_node; note that we create two IDs:
11720 # one without port, so the drbd will be activated without
11721 # networking information on the new node at this stage, and one
11722 # with network, for the latter activation in step 4
11723 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11724 if self.instance.primary_node == o_node1:
11727 assert self.instance.primary_node == o_node2, "Three-node instance?"
11730 new_alone_id = (self.instance.primary_node, self.new_node, None,
11731 p_minor, new_minor, o_secret)
11732 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11733 p_minor, new_minor, o_secret)
11735 iv_names[idx] = (dev, dev.children, new_net_id)
11736 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11738 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11739 logical_id=new_alone_id,
11740 children=dev.children,
11743 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11746 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11748 _GetInstanceInfoText(self.instance), False)
11749 except errors.GenericError:
11750 self.cfg.ReleaseDRBDMinors(self.instance.name)
11753 # We have new devices, shutdown the drbd on the old secondary
11754 for idx, dev in enumerate(self.instance.disks):
11755 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11756 self.cfg.SetDiskID(dev, self.target_node)
11757 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11758 (dev, self.instance)).fail_msg
11760 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11761 "node: %s" % (idx, msg),
11762 hint=("Please cleanup this device manually as"
11763 " soon as possible"))
11765 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11766 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11767 self.instance.disks)[pnode]
11769 msg = result.fail_msg
11771 # detaches didn't succeed (unlikely)
11772 self.cfg.ReleaseDRBDMinors(self.instance.name)
11773 raise errors.OpExecError("Can't detach the disks from the network on"
11774 " old node: %s" % (msg,))
11776 # if we managed to detach at least one, we update all the disks of
11777 # the instance to point to the new secondary
11778 self.lu.LogInfo("Updating instance configuration")
11779 for dev, _, new_logical_id in iv_names.itervalues():
11780 dev.logical_id = new_logical_id
11781 self.cfg.SetDiskID(dev, self.instance.primary_node)
11783 self.cfg.Update(self.instance, feedback_fn)
11785 # Release all node locks (the configuration has been updated)
11786 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11788 # and now perform the drbd attach
11789 self.lu.LogInfo("Attaching primary drbds to new secondary"
11790 " (standalone => connected)")
11791 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11793 self.node_secondary_ip,
11794 (self.instance.disks, self.instance),
11795 self.instance.name,
11797 for to_node, to_result in result.items():
11798 msg = to_result.fail_msg
11800 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11802 hint=("please do a gnt-instance info to see the"
11803 " status of disks"))
11805 cstep = itertools.count(5)
11807 if self.early_release:
11808 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11809 self._RemoveOldStorage(self.target_node, iv_names)
11810 # TODO: Check if releasing locks early still makes sense
11811 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11813 # Release all resource locks except those used by the instance
11814 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11815 keep=self.node_secondary_ip.keys())
11817 # TODO: Can the instance lock be downgraded here? Take the optional disk
11818 # shutdown in the caller into consideration.
11821 # This can fail as the old devices are degraded and _WaitForSync
11822 # does a combined result over all disks, so we don't check its return value
11823 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11824 _WaitForSync(self.lu, self.instance)
11826 # Check all devices manually
11827 self._CheckDevices(self.instance.primary_node, iv_names)
11829 # Step: remove old storage
11830 if not self.early_release:
11831 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11832 self._RemoveOldStorage(self.target_node, iv_names)
11835 class LURepairNodeStorage(NoHooksLU):
11836 """Repairs the volume group on a node.
11841 def CheckArguments(self):
11842 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11844 storage_type = self.op.storage_type
11846 if (constants.SO_FIX_CONSISTENCY not in
11847 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11848 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11849 " repaired" % storage_type,
11850 errors.ECODE_INVAL)
11852 def ExpandNames(self):
11853 self.needed_locks = {
11854 locking.LEVEL_NODE: [self.op.node_name],
11857 def _CheckFaultyDisks(self, instance, node_name):
11858 """Ensure faulty disks abort the opcode or at least warn."""
11860 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11862 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11863 " node '%s'" % (instance.name, node_name),
11864 errors.ECODE_STATE)
11865 except errors.OpPrereqError, err:
11866 if self.op.ignore_consistency:
11867 self.LogWarning(str(err.args[0]))
11871 def CheckPrereq(self):
11872 """Check prerequisites.
11875 # Check whether any instance on this node has faulty disks
11876 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11877 if inst.admin_state != constants.ADMINST_UP:
11879 check_nodes = set(inst.all_nodes)
11880 check_nodes.discard(self.op.node_name)
11881 for inst_node_name in check_nodes:
11882 self._CheckFaultyDisks(inst, inst_node_name)
11884 def Exec(self, feedback_fn):
11885 feedback_fn("Repairing storage unit '%s' on %s ..." %
11886 (self.op.name, self.op.node_name))
11888 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11889 result = self.rpc.call_storage_execute(self.op.node_name,
11890 self.op.storage_type, st_args,
11892 constants.SO_FIX_CONSISTENCY)
11893 result.Raise("Failed to repair storage unit '%s' on %s" %
11894 (self.op.name, self.op.node_name))
11897 class LUNodeEvacuate(NoHooksLU):
11898 """Evacuates instances off a list of nodes.
11903 _MODE2IALLOCATOR = {
11904 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11905 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11906 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11908 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11909 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11910 constants.IALLOCATOR_NEVAC_MODES)
11912 def CheckArguments(self):
11913 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11915 def ExpandNames(self):
11916 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11918 if self.op.remote_node is not None:
11919 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11920 assert self.op.remote_node
11922 if self.op.remote_node == self.op.node_name:
11923 raise errors.OpPrereqError("Can not use evacuated node as a new"
11924 " secondary node", errors.ECODE_INVAL)
11926 if self.op.mode != constants.NODE_EVAC_SEC:
11927 raise errors.OpPrereqError("Without the use of an iallocator only"
11928 " secondary instances can be evacuated",
11929 errors.ECODE_INVAL)
11932 self.share_locks = _ShareAll()
11933 self.needed_locks = {
11934 locking.LEVEL_INSTANCE: [],
11935 locking.LEVEL_NODEGROUP: [],
11936 locking.LEVEL_NODE: [],
11939 # Determine nodes (via group) optimistically, needs verification once locks
11940 # have been acquired
11941 self.lock_nodes = self._DetermineNodes()
11943 def _DetermineNodes(self):
11944 """Gets the list of nodes to operate on.
11947 if self.op.remote_node is None:
11948 # Iallocator will choose any node(s) in the same group
11949 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11951 group_nodes = frozenset([self.op.remote_node])
11953 # Determine nodes to be locked
11954 return set([self.op.node_name]) | group_nodes
11956 def _DetermineInstances(self):
11957 """Builds list of instances to operate on.
11960 assert self.op.mode in constants.NODE_EVAC_MODES
11962 if self.op.mode == constants.NODE_EVAC_PRI:
11963 # Primary instances only
11964 inst_fn = _GetNodePrimaryInstances
11965 assert self.op.remote_node is None, \
11966 "Evacuating primary instances requires iallocator"
11967 elif self.op.mode == constants.NODE_EVAC_SEC:
11968 # Secondary instances only
11969 inst_fn = _GetNodeSecondaryInstances
11972 assert self.op.mode == constants.NODE_EVAC_ALL
11973 inst_fn = _GetNodeInstances
11974 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11976 raise errors.OpPrereqError("Due to an issue with the iallocator"
11977 " interface it is not possible to evacuate"
11978 " all instances at once; specify explicitly"
11979 " whether to evacuate primary or secondary"
11981 errors.ECODE_INVAL)
11983 return inst_fn(self.cfg, self.op.node_name)
11985 def DeclareLocks(self, level):
11986 if level == locking.LEVEL_INSTANCE:
11987 # Lock instances optimistically, needs verification once node and group
11988 # locks have been acquired
11989 self.needed_locks[locking.LEVEL_INSTANCE] = \
11990 set(i.name for i in self._DetermineInstances())
11992 elif level == locking.LEVEL_NODEGROUP:
11993 # Lock node groups for all potential target nodes optimistically, needs
11994 # verification once nodes have been acquired
11995 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11996 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11998 elif level == locking.LEVEL_NODE:
11999 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12001 def CheckPrereq(self):
12003 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12004 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12005 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12007 need_nodes = self._DetermineNodes()
12009 if not owned_nodes.issuperset(need_nodes):
12010 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12011 " locks were acquired, current nodes are"
12012 " are '%s', used to be '%s'; retry the"
12014 (self.op.node_name,
12015 utils.CommaJoin(need_nodes),
12016 utils.CommaJoin(owned_nodes)),
12017 errors.ECODE_STATE)
12019 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12020 if owned_groups != wanted_groups:
12021 raise errors.OpExecError("Node groups changed since locks were acquired,"
12022 " current groups are '%s', used to be '%s';"
12023 " retry the operation" %
12024 (utils.CommaJoin(wanted_groups),
12025 utils.CommaJoin(owned_groups)))
12027 # Determine affected instances
12028 self.instances = self._DetermineInstances()
12029 self.instance_names = [i.name for i in self.instances]
12031 if set(self.instance_names) != owned_instances:
12032 raise errors.OpExecError("Instances on node '%s' changed since locks"
12033 " were acquired, current instances are '%s',"
12034 " used to be '%s'; retry the operation" %
12035 (self.op.node_name,
12036 utils.CommaJoin(self.instance_names),
12037 utils.CommaJoin(owned_instances)))
12039 if self.instance_names:
12040 self.LogInfo("Evacuating instances from node '%s': %s",
12042 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12044 self.LogInfo("No instances to evacuate from node '%s'",
12047 if self.op.remote_node is not None:
12048 for i in self.instances:
12049 if i.primary_node == self.op.remote_node:
12050 raise errors.OpPrereqError("Node %s is the primary node of"
12051 " instance %s, cannot use it as"
12053 (self.op.remote_node, i.name),
12054 errors.ECODE_INVAL)
12056 def Exec(self, feedback_fn):
12057 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12059 if not self.instance_names:
12060 # No instances to evacuate
12063 elif self.op.iallocator is not None:
12064 # TODO: Implement relocation to other group
12065 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12066 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12067 instances=list(self.instance_names))
12068 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12070 ial.Run(self.op.iallocator)
12072 if not ial.success:
12073 raise errors.OpPrereqError("Can't compute node evacuation using"
12074 " iallocator '%s': %s" %
12075 (self.op.iallocator, ial.info),
12076 errors.ECODE_NORES)
12078 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12080 elif self.op.remote_node is not None:
12081 assert self.op.mode == constants.NODE_EVAC_SEC
12083 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12084 remote_node=self.op.remote_node,
12086 mode=constants.REPLACE_DISK_CHG,
12087 early_release=self.op.early_release)]
12088 for instance_name in self.instance_names]
12091 raise errors.ProgrammerError("No iallocator or remote node")
12093 return ResultWithJobs(jobs)
12096 def _SetOpEarlyRelease(early_release, op):
12097 """Sets C{early_release} flag on opcodes if available.
12101 op.early_release = early_release
12102 except AttributeError:
12103 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12108 def _NodeEvacDest(use_nodes, group, nodes):
12109 """Returns group or nodes depending on caller's choice.
12113 return utils.CommaJoin(nodes)
12118 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12119 """Unpacks the result of change-group and node-evacuate iallocator requests.
12121 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12122 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12124 @type lu: L{LogicalUnit}
12125 @param lu: Logical unit instance
12126 @type alloc_result: tuple/list
12127 @param alloc_result: Result from iallocator
12128 @type early_release: bool
12129 @param early_release: Whether to release locks early if possible
12130 @type use_nodes: bool
12131 @param use_nodes: Whether to display node names instead of groups
12134 (moved, failed, jobs) = alloc_result
12137 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12138 for (name, reason) in failed)
12139 lu.LogWarning("Unable to evacuate instances %s", failreason)
12140 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12143 lu.LogInfo("Instances to be moved: %s",
12144 utils.CommaJoin("%s (to %s)" %
12145 (name, _NodeEvacDest(use_nodes, group, nodes))
12146 for (name, group, nodes) in moved))
12148 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12149 map(opcodes.OpCode.LoadOpCode, ops))
12153 def _DiskSizeInBytesToMebibytes(lu, size):
12154 """Converts a disk size in bytes to mebibytes.
12156 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12159 (mib, remainder) = divmod(size, 1024 * 1024)
12162 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12163 " to not overwrite existing data (%s bytes will not be"
12164 " wiped)", (1024 * 1024) - remainder)
12170 class LUInstanceGrowDisk(LogicalUnit):
12171 """Grow a disk of an instance.
12174 HPATH = "disk-grow"
12175 HTYPE = constants.HTYPE_INSTANCE
12178 def ExpandNames(self):
12179 self._ExpandAndLockInstance()
12180 self.needed_locks[locking.LEVEL_NODE] = []
12181 self.needed_locks[locking.LEVEL_NODE_RES] = []
12182 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12183 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12185 def DeclareLocks(self, level):
12186 if level == locking.LEVEL_NODE:
12187 self._LockInstancesNodes()
12188 elif level == locking.LEVEL_NODE_RES:
12190 self.needed_locks[locking.LEVEL_NODE_RES] = \
12191 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12193 def BuildHooksEnv(self):
12194 """Build hooks env.
12196 This runs on the master, the primary and all the secondaries.
12200 "DISK": self.op.disk,
12201 "AMOUNT": self.op.amount,
12202 "ABSOLUTE": self.op.absolute,
12204 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12207 def BuildHooksNodes(self):
12208 """Build hooks nodes.
12211 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12214 def CheckPrereq(self):
12215 """Check prerequisites.
12217 This checks that the instance is in the cluster.
12220 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12221 assert instance is not None, \
12222 "Cannot retrieve locked instance %s" % self.op.instance_name
12223 nodenames = list(instance.all_nodes)
12224 for node in nodenames:
12225 _CheckNodeOnline(self, node)
12227 self.instance = instance
12229 if instance.disk_template not in constants.DTS_GROWABLE:
12230 raise errors.OpPrereqError("Instance's disk layout does not support"
12231 " growing", errors.ECODE_INVAL)
12233 self.disk = instance.FindDisk(self.op.disk)
12235 if self.op.absolute:
12236 self.target = self.op.amount
12237 self.delta = self.target - self.disk.size
12239 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12240 "current disk size (%s)" %
12241 (utils.FormatUnit(self.target, "h"),
12242 utils.FormatUnit(self.disk.size, "h")),
12243 errors.ECODE_STATE)
12245 self.delta = self.op.amount
12246 self.target = self.disk.size + self.delta
12248 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12249 utils.FormatUnit(self.delta, "h"),
12250 errors.ECODE_INVAL)
12252 if instance.disk_template not in (constants.DT_FILE,
12253 constants.DT_SHARED_FILE,
12255 # TODO: check the free disk space for file, when that feature will be
12257 _CheckNodesFreeDiskPerVG(self, nodenames,
12258 self.disk.ComputeGrowth(self.delta))
12260 def Exec(self, feedback_fn):
12261 """Execute disk grow.
12264 instance = self.instance
12267 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12268 assert (self.owned_locks(locking.LEVEL_NODE) ==
12269 self.owned_locks(locking.LEVEL_NODE_RES))
12271 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12273 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12275 raise errors.OpExecError("Cannot activate block device to grow")
12277 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12278 (self.op.disk, instance.name,
12279 utils.FormatUnit(self.delta, "h"),
12280 utils.FormatUnit(self.target, "h")))
12282 # First run all grow ops in dry-run mode
12283 for node in instance.all_nodes:
12284 self.cfg.SetDiskID(disk, node)
12285 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12287 result.Raise("Dry-run grow request failed to node %s" % node)
12290 # Get disk size from primary node for wiping
12291 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12292 result.Raise("Failed to retrieve disk size from node '%s'" %
12293 instance.primary_node)
12295 (disk_size_in_bytes, ) = result.payload
12297 if disk_size_in_bytes is None:
12298 raise errors.OpExecError("Failed to retrieve disk size from primary"
12299 " node '%s'" % instance.primary_node)
12301 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12303 assert old_disk_size >= disk.size, \
12304 ("Retrieved disk size too small (got %s, should be at least %s)" %
12305 (old_disk_size, disk.size))
12307 old_disk_size = None
12309 # We know that (as far as we can test) operations across different
12310 # nodes will succeed, time to run it for real on the backing storage
12311 for node in instance.all_nodes:
12312 self.cfg.SetDiskID(disk, node)
12313 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12315 result.Raise("Grow request failed to node %s" % node)
12317 # And now execute it for logical storage, on the primary node
12318 node = instance.primary_node
12319 self.cfg.SetDiskID(disk, node)
12320 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12322 result.Raise("Grow request failed to node %s" % node)
12324 disk.RecordGrow(self.delta)
12325 self.cfg.Update(instance, feedback_fn)
12327 # Changes have been recorded, release node lock
12328 _ReleaseLocks(self, locking.LEVEL_NODE)
12330 # Downgrade lock while waiting for sync
12331 self.glm.downgrade(locking.LEVEL_INSTANCE)
12333 assert wipe_disks ^ (old_disk_size is None)
12336 assert instance.disks[self.op.disk] == disk
12338 # Wipe newly added disk space
12339 _WipeDisks(self, instance,
12340 disks=[(self.op.disk, disk, old_disk_size)])
12342 if self.op.wait_for_sync:
12343 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12345 self.LogWarning("Disk syncing has not returned a good status; check"
12347 if instance.admin_state != constants.ADMINST_UP:
12348 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12349 elif instance.admin_state != constants.ADMINST_UP:
12350 self.LogWarning("Not shutting down the disk even if the instance is"
12351 " not supposed to be running because no wait for"
12352 " sync mode was requested")
12354 assert self.owned_locks(locking.LEVEL_NODE_RES)
12355 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12358 class LUInstanceQueryData(NoHooksLU):
12359 """Query runtime instance data.
12364 def ExpandNames(self):
12365 self.needed_locks = {}
12367 # Use locking if requested or when non-static information is wanted
12368 if not (self.op.static or self.op.use_locking):
12369 self.LogWarning("Non-static data requested, locks need to be acquired")
12370 self.op.use_locking = True
12372 if self.op.instances or not self.op.use_locking:
12373 # Expand instance names right here
12374 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12376 # Will use acquired locks
12377 self.wanted_names = None
12379 if self.op.use_locking:
12380 self.share_locks = _ShareAll()
12382 if self.wanted_names is None:
12383 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12385 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12387 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12388 self.needed_locks[locking.LEVEL_NODE] = []
12389 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12391 def DeclareLocks(self, level):
12392 if self.op.use_locking:
12393 if level == locking.LEVEL_NODEGROUP:
12394 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12396 # Lock all groups used by instances optimistically; this requires going
12397 # via the node before it's locked, requiring verification later on
12398 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12399 frozenset(group_uuid
12400 for instance_name in owned_instances
12402 self.cfg.GetInstanceNodeGroups(instance_name))
12404 elif level == locking.LEVEL_NODE:
12405 self._LockInstancesNodes()
12407 def CheckPrereq(self):
12408 """Check prerequisites.
12410 This only checks the optional instance list against the existing names.
12413 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12414 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12415 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12417 if self.wanted_names is None:
12418 assert self.op.use_locking, "Locking was not used"
12419 self.wanted_names = owned_instances
12421 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12423 if self.op.use_locking:
12424 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12427 assert not (owned_instances or owned_groups or owned_nodes)
12429 self.wanted_instances = instances.values()
12431 def _ComputeBlockdevStatus(self, node, instance, dev):
12432 """Returns the status of a block device
12435 if self.op.static or not node:
12438 self.cfg.SetDiskID(dev, node)
12440 result = self.rpc.call_blockdev_find(node, dev)
12444 result.Raise("Can't compute disk status for %s" % instance.name)
12446 status = result.payload
12450 return (status.dev_path, status.major, status.minor,
12451 status.sync_percent, status.estimated_time,
12452 status.is_degraded, status.ldisk_status)
12454 def _ComputeDiskStatus(self, instance, snode, dev):
12455 """Compute block device status.
12458 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12460 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12462 def _ComputeDiskStatusInner(self, instance, snode, dev):
12463 """Compute block device status.
12465 @attention: The device has to be annotated already.
12468 if dev.dev_type in constants.LDS_DRBD:
12469 # we change the snode then (otherwise we use the one passed in)
12470 if dev.logical_id[0] == instance.primary_node:
12471 snode = dev.logical_id[1]
12473 snode = dev.logical_id[0]
12475 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12477 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12480 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12487 "iv_name": dev.iv_name,
12488 "dev_type": dev.dev_type,
12489 "logical_id": dev.logical_id,
12490 "physical_id": dev.physical_id,
12491 "pstatus": dev_pstatus,
12492 "sstatus": dev_sstatus,
12493 "children": dev_children,
12498 def Exec(self, feedback_fn):
12499 """Gather and return data"""
12502 cluster = self.cfg.GetClusterInfo()
12504 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12505 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12507 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12508 for node in nodes.values()))
12510 group2name_fn = lambda uuid: groups[uuid].name
12512 for instance in self.wanted_instances:
12513 pnode = nodes[instance.primary_node]
12515 if self.op.static or pnode.offline:
12516 remote_state = None
12518 self.LogWarning("Primary node %s is marked offline, returning static"
12519 " information only for instance %s" %
12520 (pnode.name, instance.name))
12522 remote_info = self.rpc.call_instance_info(instance.primary_node,
12524 instance.hypervisor)
12525 remote_info.Raise("Error checking node %s" % instance.primary_node)
12526 remote_info = remote_info.payload
12527 if remote_info and "state" in remote_info:
12528 remote_state = "up"
12530 if instance.admin_state == constants.ADMINST_UP:
12531 remote_state = "down"
12533 remote_state = instance.admin_state
12535 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12538 snodes_group_uuids = [nodes[snode_name].group
12539 for snode_name in instance.secondary_nodes]
12541 result[instance.name] = {
12542 "name": instance.name,
12543 "config_state": instance.admin_state,
12544 "run_state": remote_state,
12545 "pnode": instance.primary_node,
12546 "pnode_group_uuid": pnode.group,
12547 "pnode_group_name": group2name_fn(pnode.group),
12548 "snodes": instance.secondary_nodes,
12549 "snodes_group_uuids": snodes_group_uuids,
12550 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12552 # this happens to be the same format used for hooks
12553 "nics": _NICListToTuple(self, instance.nics),
12554 "disk_template": instance.disk_template,
12556 "hypervisor": instance.hypervisor,
12557 "network_port": instance.network_port,
12558 "hv_instance": instance.hvparams,
12559 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12560 "be_instance": instance.beparams,
12561 "be_actual": cluster.FillBE(instance),
12562 "os_instance": instance.osparams,
12563 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12564 "serial_no": instance.serial_no,
12565 "mtime": instance.mtime,
12566 "ctime": instance.ctime,
12567 "uuid": instance.uuid,
12573 def PrepareContainerMods(mods, private_fn):
12574 """Prepares a list of container modifications by adding a private data field.
12576 @type mods: list of tuples; (operation, index, parameters)
12577 @param mods: List of modifications
12578 @type private_fn: callable or None
12579 @param private_fn: Callable for constructing a private data field for a
12584 if private_fn is None:
12589 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12592 #: Type description for changes as returned by L{ApplyContainerMods}'s
12594 _TApplyContModsCbChanges = \
12595 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12596 ht.TNonEmptyString,
12601 def ApplyContainerMods(kind, container, chgdesc, mods,
12602 create_fn, modify_fn, remove_fn):
12603 """Applies descriptions in C{mods} to C{container}.
12606 @param kind: One-word item description
12607 @type container: list
12608 @param container: Container to modify
12609 @type chgdesc: None or list
12610 @param chgdesc: List of applied changes
12612 @param mods: Modifications as returned by L{PrepareContainerMods}
12613 @type create_fn: callable
12614 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12615 receives absolute item index, parameters and private data object as added
12616 by L{PrepareContainerMods}, returns tuple containing new item and changes
12618 @type modify_fn: callable
12619 @param modify_fn: Callback for modifying an existing item
12620 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12621 and private data object as added by L{PrepareContainerMods}, returns
12623 @type remove_fn: callable
12624 @param remove_fn: Callback on removing item; receives absolute item index,
12625 item and private data object as added by L{PrepareContainerMods}
12628 for (op, idx, params, private) in mods:
12631 absidx = len(container) - 1
12633 raise IndexError("Not accepting negative indices other than -1")
12634 elif idx > len(container):
12635 raise IndexError("Got %s index %s, but there are only %s" %
12636 (kind, idx, len(container)))
12642 if op == constants.DDM_ADD:
12643 # Calculate where item will be added
12645 addidx = len(container)
12649 if create_fn is None:
12652 (item, changes) = create_fn(addidx, params, private)
12655 container.append(item)
12658 assert idx <= len(container)
12659 # list.insert does so before the specified index
12660 container.insert(idx, item)
12662 # Retrieve existing item
12664 item = container[absidx]
12666 raise IndexError("Invalid %s index %s" % (kind, idx))
12668 if op == constants.DDM_REMOVE:
12671 if remove_fn is not None:
12672 remove_fn(absidx, item, private)
12674 changes = [("%s/%s" % (kind, absidx), "remove")]
12676 assert container[absidx] == item
12677 del container[absidx]
12678 elif op == constants.DDM_MODIFY:
12679 if modify_fn is not None:
12680 changes = modify_fn(absidx, item, params, private)
12682 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12684 assert _TApplyContModsCbChanges(changes)
12686 if not (chgdesc is None or changes is None):
12687 chgdesc.extend(changes)
12690 def _UpdateIvNames(base_index, disks):
12691 """Updates the C{iv_name} attribute of disks.
12693 @type disks: list of L{objects.Disk}
12696 for (idx, disk) in enumerate(disks):
12697 disk.iv_name = "disk/%s" % (base_index + idx, )
12700 class _InstNicModPrivate:
12701 """Data structure for network interface modifications.
12703 Used by L{LUInstanceSetParams}.
12706 def __init__(self):
12711 class LUInstanceSetParams(LogicalUnit):
12712 """Modifies an instances's parameters.
12715 HPATH = "instance-modify"
12716 HTYPE = constants.HTYPE_INSTANCE
12720 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12721 assert ht.TList(mods)
12722 assert not mods or len(mods[0]) in (2, 3)
12724 if mods and len(mods[0]) == 2:
12728 for op, params in mods:
12729 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12730 result.append((op, -1, params))
12734 raise errors.OpPrereqError("Only one %s add or remove operation is"
12735 " supported at a time" % kind,
12736 errors.ECODE_INVAL)
12738 result.append((constants.DDM_MODIFY, op, params))
12740 assert verify_fn(result)
12747 def _CheckMods(kind, mods, key_types, item_fn):
12748 """Ensures requested disk/NIC modifications are valid.
12751 for (op, _, params) in mods:
12752 assert ht.TDict(params)
12754 utils.ForceDictType(params, key_types)
12756 if op == constants.DDM_REMOVE:
12758 raise errors.OpPrereqError("No settings should be passed when"
12759 " removing a %s" % kind,
12760 errors.ECODE_INVAL)
12761 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12762 item_fn(op, params)
12764 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12767 def _VerifyDiskModification(op, params):
12768 """Verifies a disk modification.
12771 if op == constants.DDM_ADD:
12772 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12773 if mode not in constants.DISK_ACCESS_SET:
12774 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12775 errors.ECODE_INVAL)
12777 size = params.get(constants.IDISK_SIZE, None)
12779 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12780 constants.IDISK_SIZE, errors.ECODE_INVAL)
12784 except (TypeError, ValueError), err:
12785 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12786 errors.ECODE_INVAL)
12788 params[constants.IDISK_SIZE] = size
12790 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12791 raise errors.OpPrereqError("Disk size change not possible, use"
12792 " grow-disk", errors.ECODE_INVAL)
12795 def _VerifyNicModification(op, params):
12796 """Verifies a network interface modification.
12799 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12800 ip = params.get(constants.INIC_IP, None)
12801 req_net = params.get(constants.INIC_NETWORK, None)
12802 link = params.get(constants.NIC_LINK, None)
12803 mode = params.get(constants.NIC_MODE, None)
12804 if req_net is not None:
12805 if req_net.lower() == constants.VALUE_NONE:
12806 params[constants.INIC_NETWORK] = None
12808 elif link is not None or mode is not None:
12809 raise errors.OpPrereqError("If network is given"
12810 " mode or link should not",
12811 errors.ECODE_INVAL)
12813 if op == constants.DDM_ADD:
12814 macaddr = params.get(constants.INIC_MAC, None)
12815 if macaddr is None:
12816 params[constants.INIC_MAC] = constants.VALUE_AUTO
12819 if ip.lower() == constants.VALUE_NONE:
12820 params[constants.INIC_IP] = None
12822 if ip.lower() == constants.NIC_IP_POOL:
12823 if op == constants.DDM_ADD and req_net is None:
12824 raise errors.OpPrereqError("If ip=pool, parameter network"
12826 errors.ECODE_INVAL)
12828 if not netutils.IPAddress.IsValid(ip):
12829 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12830 errors.ECODE_INVAL)
12832 if constants.INIC_MAC in params:
12833 macaddr = params[constants.INIC_MAC]
12834 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12835 macaddr = utils.NormalizeAndValidateMac(macaddr)
12837 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12838 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12839 " modifying an existing NIC",
12840 errors.ECODE_INVAL)
12842 def CheckArguments(self):
12843 if not (self.op.nics or self.op.disks or self.op.disk_template or
12844 self.op.hvparams or self.op.beparams or self.op.os_name or
12845 self.op.offline is not None or self.op.runtime_mem):
12846 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12848 if self.op.hvparams:
12849 _CheckGlobalHvParams(self.op.hvparams)
12851 self.op.disks = self._UpgradeDiskNicMods(
12852 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12853 self.op.nics = self._UpgradeDiskNicMods(
12854 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12856 # Check disk modifications
12857 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12858 self._VerifyDiskModification)
12860 if self.op.disks and self.op.disk_template is not None:
12861 raise errors.OpPrereqError("Disk template conversion and other disk"
12862 " changes not supported at the same time",
12863 errors.ECODE_INVAL)
12865 if (self.op.disk_template and
12866 self.op.disk_template in constants.DTS_INT_MIRROR and
12867 self.op.remote_node is None):
12868 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12869 " one requires specifying a secondary node",
12870 errors.ECODE_INVAL)
12872 # Check NIC modifications
12873 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12874 self._VerifyNicModification)
12876 def ExpandNames(self):
12877 self._ExpandAndLockInstance()
12878 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12879 # Can't even acquire node locks in shared mode as upcoming changes in
12880 # Ganeti 2.6 will start to modify the node object on disk conversion
12881 self.needed_locks[locking.LEVEL_NODE] = []
12882 self.needed_locks[locking.LEVEL_NODE_RES] = []
12883 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12884 # Look node group to look up the ipolicy
12885 self.share_locks[locking.LEVEL_NODEGROUP] = 1
12887 def DeclareLocks(self, level):
12888 if level == locking.LEVEL_NODEGROUP:
12889 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12890 # Acquire locks for the instance's nodegroups optimistically. Needs
12891 # to be verified in CheckPrereq
12892 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12893 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12894 elif level == locking.LEVEL_NODE:
12895 self._LockInstancesNodes()
12896 if self.op.disk_template and self.op.remote_node:
12897 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12898 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12899 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12901 self.needed_locks[locking.LEVEL_NODE_RES] = \
12902 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12904 def BuildHooksEnv(self):
12905 """Build hooks env.
12907 This runs on the master, primary and secondaries.
12911 if constants.BE_MINMEM in self.be_new:
12912 args["minmem"] = self.be_new[constants.BE_MINMEM]
12913 if constants.BE_MAXMEM in self.be_new:
12914 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12915 if constants.BE_VCPUS in self.be_new:
12916 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12917 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12918 # information at all.
12920 if self._new_nics is not None:
12923 for nic in self._new_nics:
12924 n = copy.deepcopy(nic)
12925 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12926 n.nicparams = nicparams
12927 nics.append(_NICToTuple(self, n))
12929 args["nics"] = nics
12931 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12932 if self.op.disk_template:
12933 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12934 if self.op.runtime_mem:
12935 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12939 def BuildHooksNodes(self):
12940 """Build hooks nodes.
12943 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12946 def _PrepareNicModification(self, params, private, old_ip, old_net,
12947 old_params, cluster, pnode):
12949 update_params_dict = dict([(key, params[key])
12950 for key in constants.NICS_PARAMETERS
12953 req_link = update_params_dict.get(constants.NIC_LINK, None)
12954 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12956 new_net = params.get(constants.INIC_NETWORK, old_net)
12957 if new_net is not None:
12958 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12959 if netparams is None:
12960 raise errors.OpPrereqError("No netparams found for the network"
12961 " %s, probably not connected" % new_net,
12962 errors.ECODE_INVAL)
12963 new_params = dict(netparams)
12965 new_params = _GetUpdatedParams(old_params, update_params_dict)
12967 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12969 new_filled_params = cluster.SimpleFillNIC(new_params)
12970 objects.NIC.CheckParameterSyntax(new_filled_params)
12972 new_mode = new_filled_params[constants.NIC_MODE]
12973 if new_mode == constants.NIC_MODE_BRIDGED:
12974 bridge = new_filled_params[constants.NIC_LINK]
12975 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12977 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12979 self.warn.append(msg)
12981 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12983 elif new_mode == constants.NIC_MODE_ROUTED:
12984 ip = params.get(constants.INIC_IP, old_ip)
12986 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12987 " on a routed NIC", errors.ECODE_INVAL)
12989 if constants.INIC_MAC in params:
12990 mac = params[constants.INIC_MAC]
12992 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12993 errors.ECODE_INVAL)
12994 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12995 # otherwise generate the MAC address
12996 params[constants.INIC_MAC] = \
12997 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12999 # or validate/reserve the current one
13001 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13002 except errors.ReservationError:
13003 raise errors.OpPrereqError("MAC address '%s' already in use"
13004 " in cluster" % mac,
13005 errors.ECODE_NOTUNIQUE)
13006 elif new_net != old_net:
13008 def get_net_prefix(net):
13010 uuid = self.cfg.LookupNetwork(net)
13012 nobj = self.cfg.GetNetwork(uuid)
13013 return nobj.mac_prefix
13016 new_prefix = get_net_prefix(new_net)
13017 old_prefix = get_net_prefix(old_net)
13018 if old_prefix != new_prefix:
13019 params[constants.INIC_MAC] = \
13020 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13022 #if there is a change in nic-network configuration
13023 new_ip = params.get(constants.INIC_IP, old_ip)
13024 if (new_ip, new_net) != (old_ip, old_net):
13027 if new_ip.lower() == constants.NIC_IP_POOL:
13029 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13030 except errors.ReservationError:
13031 raise errors.OpPrereqError("Unable to get a free IP"
13032 " from the address pool",
13033 errors.ECODE_STATE)
13034 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13035 params[constants.INIC_IP] = new_ip
13036 elif new_ip != old_ip or new_net != old_net:
13038 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13039 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13040 except errors.ReservationError:
13041 raise errors.OpPrereqError("IP %s not available in network %s" %
13043 errors.ECODE_NOTUNIQUE)
13044 elif new_ip.lower() == constants.NIC_IP_POOL:
13045 raise errors.OpPrereqError("ip=pool, but no network found",
13046 errors.ECODE_INVAL)
13049 if self.op.conflicts_check:
13050 _CheckForConflictingIp(self, new_ip, pnode)
13055 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13056 except errors.AddressPoolError:
13057 logging.warning("Release IP %s not contained in network %s",
13060 # there are no changes in (net, ip) tuple
13061 elif (old_net is not None and
13062 (req_link is not None or req_mode is not None)):
13063 raise errors.OpPrereqError("Not allowed to change link or mode of"
13064 " a NIC that is connected to a network",
13065 errors.ECODE_INVAL)
13067 private.params = new_params
13068 private.filled = new_filled_params
13070 def CheckPrereq(self):
13071 """Check prerequisites.
13073 This only checks the instance list against the existing names.
13076 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13077 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13079 cluster = self.cluster = self.cfg.GetClusterInfo()
13080 assert self.instance is not None, \
13081 "Cannot retrieve locked instance %s" % self.op.instance_name
13083 pnode = instance.primary_node
13084 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13085 nodelist = list(instance.all_nodes)
13086 pnode_info = self.cfg.GetNodeInfo(pnode)
13087 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13089 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13090 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13091 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13093 # dictionary with instance information after the modification
13096 # Prepare disk/NIC modifications
13097 self.diskmod = PrepareContainerMods(self.op.disks, None)
13098 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13101 if self.op.os_name and not self.op.force:
13102 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13103 self.op.force_variant)
13104 instance_os = self.op.os_name
13106 instance_os = instance.os
13108 assert not (self.op.disk_template and self.op.disks), \
13109 "Can't modify disk template and apply disk changes at the same time"
13111 if self.op.disk_template:
13112 if instance.disk_template == self.op.disk_template:
13113 raise errors.OpPrereqError("Instance already has disk template %s" %
13114 instance.disk_template, errors.ECODE_INVAL)
13116 if (instance.disk_template,
13117 self.op.disk_template) not in self._DISK_CONVERSIONS:
13118 raise errors.OpPrereqError("Unsupported disk template conversion from"
13119 " %s to %s" % (instance.disk_template,
13120 self.op.disk_template),
13121 errors.ECODE_INVAL)
13122 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13123 msg="cannot change disk template")
13124 if self.op.disk_template in constants.DTS_INT_MIRROR:
13125 if self.op.remote_node == pnode:
13126 raise errors.OpPrereqError("Given new secondary node %s is the same"
13127 " as the primary node of the instance" %
13128 self.op.remote_node, errors.ECODE_STATE)
13129 _CheckNodeOnline(self, self.op.remote_node)
13130 _CheckNodeNotDrained(self, self.op.remote_node)
13131 # FIXME: here we assume that the old instance type is DT_PLAIN
13132 assert instance.disk_template == constants.DT_PLAIN
13133 disks = [{constants.IDISK_SIZE: d.size,
13134 constants.IDISK_VG: d.logical_id[0]}
13135 for d in instance.disks]
13136 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13137 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13139 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13140 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13141 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13143 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13144 ignore=self.op.ignore_ipolicy)
13145 if pnode_info.group != snode_info.group:
13146 self.LogWarning("The primary and secondary nodes are in two"
13147 " different node groups; the disk parameters"
13148 " from the first disk's node group will be"
13151 # hvparams processing
13152 if self.op.hvparams:
13153 hv_type = instance.hypervisor
13154 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13155 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13156 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13159 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13160 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13161 self.hv_proposed = self.hv_new = hv_new # the new actual values
13162 self.hv_inst = i_hvdict # the new dict (without defaults)
13164 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13166 self.hv_new = self.hv_inst = {}
13168 # beparams processing
13169 if self.op.beparams:
13170 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13172 objects.UpgradeBeParams(i_bedict)
13173 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13174 be_new = cluster.SimpleFillBE(i_bedict)
13175 self.be_proposed = self.be_new = be_new # the new actual values
13176 self.be_inst = i_bedict # the new dict (without defaults)
13178 self.be_new = self.be_inst = {}
13179 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13180 be_old = cluster.FillBE(instance)
13182 # CPU param validation -- checking every time a parameter is
13183 # changed to cover all cases where either CPU mask or vcpus have
13185 if (constants.BE_VCPUS in self.be_proposed and
13186 constants.HV_CPU_MASK in self.hv_proposed):
13188 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13189 # Verify mask is consistent with number of vCPUs. Can skip this
13190 # test if only 1 entry in the CPU mask, which means same mask
13191 # is applied to all vCPUs.
13192 if (len(cpu_list) > 1 and
13193 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13194 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13196 (self.be_proposed[constants.BE_VCPUS],
13197 self.hv_proposed[constants.HV_CPU_MASK]),
13198 errors.ECODE_INVAL)
13200 # Only perform this test if a new CPU mask is given
13201 if constants.HV_CPU_MASK in self.hv_new:
13202 # Calculate the largest CPU number requested
13203 max_requested_cpu = max(map(max, cpu_list))
13204 # Check that all of the instance's nodes have enough physical CPUs to
13205 # satisfy the requested CPU mask
13206 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13207 max_requested_cpu + 1, instance.hypervisor)
13209 # osparams processing
13210 if self.op.osparams:
13211 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13212 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13213 self.os_inst = i_osdict # the new dict (without defaults)
13219 #TODO(dynmem): do the appropriate check involving MINMEM
13220 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13221 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13222 mem_check_list = [pnode]
13223 if be_new[constants.BE_AUTO_BALANCE]:
13224 # either we changed auto_balance to yes or it was from before
13225 mem_check_list.extend(instance.secondary_nodes)
13226 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13227 instance.hypervisor)
13228 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13229 [instance.hypervisor])
13230 pninfo = nodeinfo[pnode]
13231 msg = pninfo.fail_msg
13233 # Assume the primary node is unreachable and go ahead
13234 self.warn.append("Can't get info from primary node %s: %s" %
13237 (_, _, (pnhvinfo, )) = pninfo.payload
13238 if not isinstance(pnhvinfo.get("memory_free", None), int):
13239 self.warn.append("Node data from primary node %s doesn't contain"
13240 " free memory information" % pnode)
13241 elif instance_info.fail_msg:
13242 self.warn.append("Can't get instance runtime information: %s" %
13243 instance_info.fail_msg)
13245 if instance_info.payload:
13246 current_mem = int(instance_info.payload["memory"])
13248 # Assume instance not running
13249 # (there is a slight race condition here, but it's not very
13250 # probable, and we have no other way to check)
13251 # TODO: Describe race condition
13253 #TODO(dynmem): do the appropriate check involving MINMEM
13254 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13255 pnhvinfo["memory_free"])
13257 raise errors.OpPrereqError("This change will prevent the instance"
13258 " from starting, due to %d MB of memory"
13259 " missing on its primary node" %
13260 miss_mem, errors.ECODE_NORES)
13262 if be_new[constants.BE_AUTO_BALANCE]:
13263 for node, nres in nodeinfo.items():
13264 if node not in instance.secondary_nodes:
13266 nres.Raise("Can't get info from secondary node %s" % node,
13267 prereq=True, ecode=errors.ECODE_STATE)
13268 (_, _, (nhvinfo, )) = nres.payload
13269 if not isinstance(nhvinfo.get("memory_free", None), int):
13270 raise errors.OpPrereqError("Secondary node %s didn't return free"
13271 " memory information" % node,
13272 errors.ECODE_STATE)
13273 #TODO(dynmem): do the appropriate check involving MINMEM
13274 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13275 raise errors.OpPrereqError("This change will prevent the instance"
13276 " from failover to its secondary node"
13277 " %s, due to not enough memory" % node,
13278 errors.ECODE_STATE)
13280 if self.op.runtime_mem:
13281 remote_info = self.rpc.call_instance_info(instance.primary_node,
13283 instance.hypervisor)
13284 remote_info.Raise("Error checking node %s" % instance.primary_node)
13285 if not remote_info.payload: # not running already
13286 raise errors.OpPrereqError("Instance %s is not running" %
13287 instance.name, errors.ECODE_STATE)
13289 current_memory = remote_info.payload["memory"]
13290 if (not self.op.force and
13291 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13292 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13293 raise errors.OpPrereqError("Instance %s must have memory between %d"
13294 " and %d MB of memory unless --force is"
13297 self.be_proposed[constants.BE_MINMEM],
13298 self.be_proposed[constants.BE_MAXMEM]),
13299 errors.ECODE_INVAL)
13301 delta = self.op.runtime_mem - current_memory
13303 _CheckNodeFreeMemory(self, instance.primary_node,
13304 "ballooning memory for instance %s" %
13305 instance.name, delta, instance.hypervisor)
13307 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13308 raise errors.OpPrereqError("Disk operations not supported for"
13309 " diskless instances", errors.ECODE_INVAL)
13311 def _PrepareNicCreate(_, params, private):
13312 self._PrepareNicModification(params, private, None, None,
13313 {}, cluster, pnode)
13314 return (None, None)
13316 def _PrepareNicMod(_, nic, params, private):
13317 self._PrepareNicModification(params, private, nic.ip, nic.network,
13318 nic.nicparams, cluster, pnode)
13321 def _PrepareNicRemove(_, params, __):
13323 net = params.network
13324 if net is not None and ip is not None:
13325 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13327 # Verify NIC changes (operating on copy)
13328 nics = instance.nics[:]
13329 ApplyContainerMods("NIC", nics, None, self.nicmod,
13330 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13331 if len(nics) > constants.MAX_NICS:
13332 raise errors.OpPrereqError("Instance has too many network interfaces"
13333 " (%d), cannot add more" % constants.MAX_NICS,
13334 errors.ECODE_STATE)
13336 # Verify disk changes (operating on a copy)
13337 disks = instance.disks[:]
13338 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13339 if len(disks) > constants.MAX_DISKS:
13340 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13341 " more" % constants.MAX_DISKS,
13342 errors.ECODE_STATE)
13343 disk_sizes = [disk.size for disk in instance.disks]
13344 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13345 self.diskmod if op == constants.DDM_ADD)
13346 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13347 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13349 if self.op.offline is not None:
13350 if self.op.offline:
13351 msg = "can't change to offline"
13353 msg = "can't change to online"
13354 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13356 # Pre-compute NIC changes (necessary to use result in hooks)
13357 self._nic_chgdesc = []
13359 # Operate on copies as this is still in prereq
13360 nics = [nic.Copy() for nic in instance.nics]
13361 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13362 self._CreateNewNic, self._ApplyNicMods, None)
13363 self._new_nics = nics
13364 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13366 self._new_nics = None
13367 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13369 if not self.op.ignore_ipolicy:
13370 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13373 # Fill ispec with backend parameters
13374 ispec[constants.ISPEC_SPINDLE_USE] = \
13375 self.be_new.get(constants.BE_SPINDLE_USE, None)
13376 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13379 # Copy ispec to verify parameters with min/max values separately
13380 ispec_max = ispec.copy()
13381 ispec_max[constants.ISPEC_MEM_SIZE] = \
13382 self.be_new.get(constants.BE_MAXMEM, None)
13383 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13384 ispec_min = ispec.copy()
13385 ispec_min[constants.ISPEC_MEM_SIZE] = \
13386 self.be_new.get(constants.BE_MINMEM, None)
13387 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13389 if (res_max or res_min):
13390 # FIXME: Improve error message by including information about whether
13391 # the upper or lower limit of the parameter fails the ipolicy.
13392 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13393 (group_info, group_info.name,
13394 utils.CommaJoin(set(res_max + res_min))))
13395 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13397 def _ConvertPlainToDrbd(self, feedback_fn):
13398 """Converts an instance from plain to drbd.
13401 feedback_fn("Converting template to drbd")
13402 instance = self.instance
13403 pnode = instance.primary_node
13404 snode = self.op.remote_node
13406 assert instance.disk_template == constants.DT_PLAIN
13408 # create a fake disk info for _GenerateDiskTemplate
13409 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13410 constants.IDISK_VG: d.logical_id[0]}
13411 for d in instance.disks]
13412 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13413 instance.name, pnode, [snode],
13414 disk_info, None, None, 0, feedback_fn,
13416 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13418 info = _GetInstanceInfoText(instance)
13419 feedback_fn("Creating additional volumes...")
13420 # first, create the missing data and meta devices
13421 for disk in anno_disks:
13422 # unfortunately this is... not too nice
13423 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13425 for child in disk.children:
13426 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13427 # at this stage, all new LVs have been created, we can rename the
13429 feedback_fn("Renaming original volumes...")
13430 rename_list = [(o, n.children[0].logical_id)
13431 for (o, n) in zip(instance.disks, new_disks)]
13432 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13433 result.Raise("Failed to rename original LVs")
13435 feedback_fn("Initializing DRBD devices...")
13436 # all child devices are in place, we can now create the DRBD devices
13437 for disk in anno_disks:
13438 for node in [pnode, snode]:
13439 f_create = node == pnode
13440 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13442 # at this point, the instance has been modified
13443 instance.disk_template = constants.DT_DRBD8
13444 instance.disks = new_disks
13445 self.cfg.Update(instance, feedback_fn)
13447 # Release node locks while waiting for sync
13448 _ReleaseLocks(self, locking.LEVEL_NODE)
13450 # disks are created, waiting for sync
13451 disk_abort = not _WaitForSync(self, instance,
13452 oneshot=not self.op.wait_for_sync)
13454 raise errors.OpExecError("There are some degraded disks for"
13455 " this instance, please cleanup manually")
13457 # Node resource locks will be released by caller
13459 def _ConvertDrbdToPlain(self, feedback_fn):
13460 """Converts an instance from drbd to plain.
13463 instance = self.instance
13465 assert len(instance.secondary_nodes) == 1
13466 assert instance.disk_template == constants.DT_DRBD8
13468 pnode = instance.primary_node
13469 snode = instance.secondary_nodes[0]
13470 feedback_fn("Converting template to plain")
13472 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13473 new_disks = [d.children[0] for d in instance.disks]
13475 # copy over size and mode
13476 for parent, child in zip(old_disks, new_disks):
13477 child.size = parent.size
13478 child.mode = parent.mode
13480 # this is a DRBD disk, return its port to the pool
13481 # NOTE: this must be done right before the call to cfg.Update!
13482 for disk in old_disks:
13483 tcp_port = disk.logical_id[2]
13484 self.cfg.AddTcpUdpPort(tcp_port)
13486 # update instance structure
13487 instance.disks = new_disks
13488 instance.disk_template = constants.DT_PLAIN
13489 self.cfg.Update(instance, feedback_fn)
13491 # Release locks in case removing disks takes a while
13492 _ReleaseLocks(self, locking.LEVEL_NODE)
13494 feedback_fn("Removing volumes on the secondary node...")
13495 for disk in old_disks:
13496 self.cfg.SetDiskID(disk, snode)
13497 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13499 self.LogWarning("Could not remove block device %s on node %s,"
13500 " continuing anyway: %s", disk.iv_name, snode, msg)
13502 feedback_fn("Removing unneeded volumes on the primary node...")
13503 for idx, disk in enumerate(old_disks):
13504 meta = disk.children[1]
13505 self.cfg.SetDiskID(meta, pnode)
13506 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13508 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13509 " continuing anyway: %s", idx, pnode, msg)
13511 def _CreateNewDisk(self, idx, params, _):
13512 """Creates a new disk.
13515 instance = self.instance
13518 if instance.disk_template in constants.DTS_FILEBASED:
13519 (file_driver, file_path) = instance.disks[0].logical_id
13520 file_path = os.path.dirname(file_path)
13522 file_driver = file_path = None
13525 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13526 instance.primary_node, instance.secondary_nodes,
13527 [params], file_path, file_driver, idx,
13528 self.Log, self.diskparams)[0]
13530 info = _GetInstanceInfoText(instance)
13532 logging.info("Creating volume %s for instance %s",
13533 disk.iv_name, instance.name)
13534 # Note: this needs to be kept in sync with _CreateDisks
13536 for node in instance.all_nodes:
13537 f_create = (node == instance.primary_node)
13539 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13540 except errors.OpExecError, err:
13541 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13542 disk.iv_name, disk, node, err)
13545 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13549 def _ModifyDisk(idx, disk, params, _):
13550 """Modifies a disk.
13553 disk.mode = params[constants.IDISK_MODE]
13556 ("disk.mode/%d" % idx, disk.mode),
13559 def _RemoveDisk(self, idx, root, _):
13563 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13564 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13565 self.cfg.SetDiskID(disk, node)
13566 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13568 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13569 " continuing anyway", idx, node, msg)
13571 # if this is a DRBD disk, return its port to the pool
13572 if root.dev_type in constants.LDS_DRBD:
13573 self.cfg.AddTcpUdpPort(root.logical_id[2])
13576 def _CreateNewNic(idx, params, private):
13577 """Creates data structure for a new network interface.
13580 mac = params[constants.INIC_MAC]
13581 ip = params.get(constants.INIC_IP, None)
13582 net = params.get(constants.INIC_NETWORK, None)
13583 #TODO: not private.filled?? can a nic have no nicparams??
13584 nicparams = private.filled
13586 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13588 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13589 (mac, ip, private.filled[constants.NIC_MODE],
13590 private.filled[constants.NIC_LINK],
13595 def _ApplyNicMods(idx, nic, params, private):
13596 """Modifies a network interface.
13601 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13603 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13604 setattr(nic, key, params[key])
13607 nic.nicparams = private.filled
13609 for (key, val) in nic.nicparams.items():
13610 changes.append(("nic.%s/%d" % (key, idx), val))
13614 def Exec(self, feedback_fn):
13615 """Modifies an instance.
13617 All parameters take effect only at the next restart of the instance.
13620 # Process here the warnings from CheckPrereq, as we don't have a
13621 # feedback_fn there.
13622 # TODO: Replace with self.LogWarning
13623 for warn in self.warn:
13624 feedback_fn("WARNING: %s" % warn)
13626 assert ((self.op.disk_template is None) ^
13627 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13628 "Not owning any node resource locks"
13631 instance = self.instance
13634 if self.op.runtime_mem:
13635 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13637 self.op.runtime_mem)
13638 rpcres.Raise("Cannot modify instance runtime memory")
13639 result.append(("runtime_memory", self.op.runtime_mem))
13641 # Apply disk changes
13642 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13643 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13644 _UpdateIvNames(0, instance.disks)
13646 if self.op.disk_template:
13648 check_nodes = set(instance.all_nodes)
13649 if self.op.remote_node:
13650 check_nodes.add(self.op.remote_node)
13651 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13652 owned = self.owned_locks(level)
13653 assert not (check_nodes - owned), \
13654 ("Not owning the correct locks, owning %r, expected at least %r" %
13655 (owned, check_nodes))
13657 r_shut = _ShutdownInstanceDisks(self, instance)
13659 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13660 " proceed with disk template conversion")
13661 mode = (instance.disk_template, self.op.disk_template)
13663 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13665 self.cfg.ReleaseDRBDMinors(instance.name)
13667 result.append(("disk_template", self.op.disk_template))
13669 assert instance.disk_template == self.op.disk_template, \
13670 ("Expected disk template '%s', found '%s'" %
13671 (self.op.disk_template, instance.disk_template))
13673 # Release node and resource locks if there are any (they might already have
13674 # been released during disk conversion)
13675 _ReleaseLocks(self, locking.LEVEL_NODE)
13676 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13678 # Apply NIC changes
13679 if self._new_nics is not None:
13680 instance.nics = self._new_nics
13681 result.extend(self._nic_chgdesc)
13684 if self.op.hvparams:
13685 instance.hvparams = self.hv_inst
13686 for key, val in self.op.hvparams.iteritems():
13687 result.append(("hv/%s" % key, val))
13690 if self.op.beparams:
13691 instance.beparams = self.be_inst
13692 for key, val in self.op.beparams.iteritems():
13693 result.append(("be/%s" % key, val))
13696 if self.op.os_name:
13697 instance.os = self.op.os_name
13700 if self.op.osparams:
13701 instance.osparams = self.os_inst
13702 for key, val in self.op.osparams.iteritems():
13703 result.append(("os/%s" % key, val))
13705 if self.op.offline is None:
13708 elif self.op.offline:
13709 # Mark instance as offline
13710 self.cfg.MarkInstanceOffline(instance.name)
13711 result.append(("admin_state", constants.ADMINST_OFFLINE))
13713 # Mark instance as online, but stopped
13714 self.cfg.MarkInstanceDown(instance.name)
13715 result.append(("admin_state", constants.ADMINST_DOWN))
13717 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13719 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13720 self.owned_locks(locking.LEVEL_NODE)), \
13721 "All node locks should have been released by now"
13725 _DISK_CONVERSIONS = {
13726 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13727 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13731 class LUInstanceChangeGroup(LogicalUnit):
13732 HPATH = "instance-change-group"
13733 HTYPE = constants.HTYPE_INSTANCE
13736 def ExpandNames(self):
13737 self.share_locks = _ShareAll()
13739 self.needed_locks = {
13740 locking.LEVEL_NODEGROUP: [],
13741 locking.LEVEL_NODE: [],
13742 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
13745 self._ExpandAndLockInstance()
13747 if self.op.target_groups:
13748 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13749 self.op.target_groups)
13751 self.req_target_uuids = None
13753 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13755 def DeclareLocks(self, level):
13756 if level == locking.LEVEL_NODEGROUP:
13757 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13759 if self.req_target_uuids:
13760 lock_groups = set(self.req_target_uuids)
13762 # Lock all groups used by instance optimistically; this requires going
13763 # via the node before it's locked, requiring verification later on
13764 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13765 lock_groups.update(instance_groups)
13767 # No target groups, need to lock all of them
13768 lock_groups = locking.ALL_SET
13770 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13772 elif level == locking.LEVEL_NODE:
13773 if self.req_target_uuids:
13774 # Lock all nodes used by instances
13775 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13776 self._LockInstancesNodes()
13778 # Lock all nodes in all potential target groups
13779 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13780 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13781 member_nodes = [node_name
13782 for group in lock_groups
13783 for node_name in self.cfg.GetNodeGroup(group).members]
13784 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13786 # Lock all nodes as all groups are potential targets
13787 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13789 def CheckPrereq(self):
13790 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13791 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13792 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13794 assert (self.req_target_uuids is None or
13795 owned_groups.issuperset(self.req_target_uuids))
13796 assert owned_instances == set([self.op.instance_name])
13798 # Get instance information
13799 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13801 # Check if node groups for locked instance are still correct
13802 assert owned_nodes.issuperset(self.instance.all_nodes), \
13803 ("Instance %s's nodes changed while we kept the lock" %
13804 self.op.instance_name)
13806 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13809 if self.req_target_uuids:
13810 # User requested specific target groups
13811 self.target_uuids = frozenset(self.req_target_uuids)
13813 # All groups except those used by the instance are potential targets
13814 self.target_uuids = owned_groups - inst_groups
13816 conflicting_groups = self.target_uuids & inst_groups
13817 if conflicting_groups:
13818 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13819 " used by the instance '%s'" %
13820 (utils.CommaJoin(conflicting_groups),
13821 self.op.instance_name),
13822 errors.ECODE_INVAL)
13824 if not self.target_uuids:
13825 raise errors.OpPrereqError("There are no possible target groups",
13826 errors.ECODE_INVAL)
13828 def BuildHooksEnv(self):
13829 """Build hooks env.
13832 assert self.target_uuids
13835 "TARGET_GROUPS": " ".join(self.target_uuids),
13838 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13842 def BuildHooksNodes(self):
13843 """Build hooks nodes.
13846 mn = self.cfg.GetMasterNode()
13847 return ([mn], [mn])
13849 def Exec(self, feedback_fn):
13850 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13852 assert instances == [self.op.instance_name], "Instance not locked"
13854 req = iallocator.IAReqGroupChange(instances=instances,
13855 target_groups=list(self.target_uuids))
13856 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13858 ial.Run(self.op.iallocator)
13860 if not ial.success:
13861 raise errors.OpPrereqError("Can't compute solution for changing group of"
13862 " instance '%s' using iallocator '%s': %s" %
13863 (self.op.instance_name, self.op.iallocator,
13864 ial.info), errors.ECODE_NORES)
13866 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13868 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13869 " instance '%s'", len(jobs), self.op.instance_name)
13871 return ResultWithJobs(jobs)
13874 class LUBackupQuery(NoHooksLU):
13875 """Query the exports list
13880 def CheckArguments(self):
13881 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13882 ["node", "export"], self.op.use_locking)
13884 def ExpandNames(self):
13885 self.expq.ExpandNames(self)
13887 def DeclareLocks(self, level):
13888 self.expq.DeclareLocks(self, level)
13890 def Exec(self, feedback_fn):
13893 for (node, expname) in self.expq.OldStyleQuery(self):
13894 if expname is None:
13895 result[node] = False
13897 result.setdefault(node, []).append(expname)
13902 class _ExportQuery(_QueryBase):
13903 FIELDS = query.EXPORT_FIELDS
13905 #: The node name is not a unique key for this query
13906 SORT_FIELD = "node"
13908 def ExpandNames(self, lu):
13909 lu.needed_locks = {}
13911 # The following variables interact with _QueryBase._GetNames
13913 self.wanted = _GetWantedNodes(lu, self.names)
13915 self.wanted = locking.ALL_SET
13917 self.do_locking = self.use_locking
13919 if self.do_locking:
13920 lu.share_locks = _ShareAll()
13921 lu.needed_locks = {
13922 locking.LEVEL_NODE: self.wanted,
13925 def DeclareLocks(self, lu, level):
13928 def _GetQueryData(self, lu):
13929 """Computes the list of nodes and their attributes.
13932 # Locking is not used
13934 assert not (compat.any(lu.glm.is_owned(level)
13935 for level in locking.LEVELS
13936 if level != locking.LEVEL_CLUSTER) or
13937 self.do_locking or self.use_locking)
13939 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13943 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13945 result.append((node, None))
13947 result.extend((node, expname) for expname in nres.payload)
13952 class LUBackupPrepare(NoHooksLU):
13953 """Prepares an instance for an export and returns useful information.
13958 def ExpandNames(self):
13959 self._ExpandAndLockInstance()
13961 def CheckPrereq(self):
13962 """Check prerequisites.
13965 instance_name = self.op.instance_name
13967 self.instance = self.cfg.GetInstanceInfo(instance_name)
13968 assert self.instance is not None, \
13969 "Cannot retrieve locked instance %s" % self.op.instance_name
13970 _CheckNodeOnline(self, self.instance.primary_node)
13972 self._cds = _GetClusterDomainSecret()
13974 def Exec(self, feedback_fn):
13975 """Prepares an instance for an export.
13978 instance = self.instance
13980 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13981 salt = utils.GenerateSecret(8)
13983 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13984 result = self.rpc.call_x509_cert_create(instance.primary_node,
13985 constants.RIE_CERT_VALIDITY)
13986 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13988 (name, cert_pem) = result.payload
13990 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13994 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13995 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13997 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14003 class LUBackupExport(LogicalUnit):
14004 """Export an instance to an image in the cluster.
14007 HPATH = "instance-export"
14008 HTYPE = constants.HTYPE_INSTANCE
14011 def CheckArguments(self):
14012 """Check the arguments.
14015 self.x509_key_name = self.op.x509_key_name
14016 self.dest_x509_ca_pem = self.op.destination_x509_ca
14018 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14019 if not self.x509_key_name:
14020 raise errors.OpPrereqError("Missing X509 key name for encryption",
14021 errors.ECODE_INVAL)
14023 if not self.dest_x509_ca_pem:
14024 raise errors.OpPrereqError("Missing destination X509 CA",
14025 errors.ECODE_INVAL)
14027 def ExpandNames(self):
14028 self._ExpandAndLockInstance()
14030 # Lock all nodes for local exports
14031 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14032 # FIXME: lock only instance primary and destination node
14034 # Sad but true, for now we have do lock all nodes, as we don't know where
14035 # the previous export might be, and in this LU we search for it and
14036 # remove it from its current node. In the future we could fix this by:
14037 # - making a tasklet to search (share-lock all), then create the
14038 # new one, then one to remove, after
14039 # - removing the removal operation altogether
14040 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14042 def DeclareLocks(self, level):
14043 """Last minute lock declaration."""
14044 # All nodes are locked anyway, so nothing to do here.
14046 def BuildHooksEnv(self):
14047 """Build hooks env.
14049 This will run on the master, primary node and target node.
14053 "EXPORT_MODE": self.op.mode,
14054 "EXPORT_NODE": self.op.target_node,
14055 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14056 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14057 # TODO: Generic function for boolean env variables
14058 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14061 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14065 def BuildHooksNodes(self):
14066 """Build hooks nodes.
14069 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14071 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14072 nl.append(self.op.target_node)
14076 def CheckPrereq(self):
14077 """Check prerequisites.
14079 This checks that the instance and node names are valid.
14082 instance_name = self.op.instance_name
14084 self.instance = self.cfg.GetInstanceInfo(instance_name)
14085 assert self.instance is not None, \
14086 "Cannot retrieve locked instance %s" % self.op.instance_name
14087 _CheckNodeOnline(self, self.instance.primary_node)
14089 if (self.op.remove_instance and
14090 self.instance.admin_state == constants.ADMINST_UP and
14091 not self.op.shutdown):
14092 raise errors.OpPrereqError("Can not remove instance without shutting it"
14093 " down before", errors.ECODE_STATE)
14095 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14096 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14097 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14098 assert self.dst_node is not None
14100 _CheckNodeOnline(self, self.dst_node.name)
14101 _CheckNodeNotDrained(self, self.dst_node.name)
14104 self.dest_disk_info = None
14105 self.dest_x509_ca = None
14107 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14108 self.dst_node = None
14110 if len(self.op.target_node) != len(self.instance.disks):
14111 raise errors.OpPrereqError(("Received destination information for %s"
14112 " disks, but instance %s has %s disks") %
14113 (len(self.op.target_node), instance_name,
14114 len(self.instance.disks)),
14115 errors.ECODE_INVAL)
14117 cds = _GetClusterDomainSecret()
14119 # Check X509 key name
14121 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14122 except (TypeError, ValueError), err:
14123 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14124 errors.ECODE_INVAL)
14126 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14127 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14128 errors.ECODE_INVAL)
14130 # Load and verify CA
14132 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14133 except OpenSSL.crypto.Error, err:
14134 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14135 (err, ), errors.ECODE_INVAL)
14137 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14138 if errcode is not None:
14139 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14140 (msg, ), errors.ECODE_INVAL)
14142 self.dest_x509_ca = cert
14144 # Verify target information
14146 for idx, disk_data in enumerate(self.op.target_node):
14148 (host, port, magic) = \
14149 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14150 except errors.GenericError, err:
14151 raise errors.OpPrereqError("Target info for disk %s: %s" %
14152 (idx, err), errors.ECODE_INVAL)
14154 disk_info.append((host, port, magic))
14156 assert len(disk_info) == len(self.op.target_node)
14157 self.dest_disk_info = disk_info
14160 raise errors.ProgrammerError("Unhandled export mode %r" %
14163 # instance disk type verification
14164 # TODO: Implement export support for file-based disks
14165 for disk in self.instance.disks:
14166 if disk.dev_type == constants.LD_FILE:
14167 raise errors.OpPrereqError("Export not supported for instances with"
14168 " file-based disks", errors.ECODE_INVAL)
14170 def _CleanupExports(self, feedback_fn):
14171 """Removes exports of current instance from all other nodes.
14173 If an instance in a cluster with nodes A..D was exported to node C, its
14174 exports will be removed from the nodes A, B and D.
14177 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14179 nodelist = self.cfg.GetNodeList()
14180 nodelist.remove(self.dst_node.name)
14182 # on one-node clusters nodelist will be empty after the removal
14183 # if we proceed the backup would be removed because OpBackupQuery
14184 # substitutes an empty list with the full cluster node list.
14185 iname = self.instance.name
14187 feedback_fn("Removing old exports for instance %s" % iname)
14188 exportlist = self.rpc.call_export_list(nodelist)
14189 for node in exportlist:
14190 if exportlist[node].fail_msg:
14192 if iname in exportlist[node].payload:
14193 msg = self.rpc.call_export_remove(node, iname).fail_msg
14195 self.LogWarning("Could not remove older export for instance %s"
14196 " on node %s: %s", iname, node, msg)
14198 def Exec(self, feedback_fn):
14199 """Export an instance to an image in the cluster.
14202 assert self.op.mode in constants.EXPORT_MODES
14204 instance = self.instance
14205 src_node = instance.primary_node
14207 if self.op.shutdown:
14208 # shutdown the instance, but not the disks
14209 feedback_fn("Shutting down instance %s" % instance.name)
14210 result = self.rpc.call_instance_shutdown(src_node, instance,
14211 self.op.shutdown_timeout)
14212 # TODO: Maybe ignore failures if ignore_remove_failures is set
14213 result.Raise("Could not shutdown instance %s on"
14214 " node %s" % (instance.name, src_node))
14216 # set the disks ID correctly since call_instance_start needs the
14217 # correct drbd minor to create the symlinks
14218 for disk in instance.disks:
14219 self.cfg.SetDiskID(disk, src_node)
14221 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14224 # Activate the instance disks if we'exporting a stopped instance
14225 feedback_fn("Activating disks for %s" % instance.name)
14226 _StartInstanceDisks(self, instance, None)
14229 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14232 helper.CreateSnapshots()
14234 if (self.op.shutdown and
14235 instance.admin_state == constants.ADMINST_UP and
14236 not self.op.remove_instance):
14237 assert not activate_disks
14238 feedback_fn("Starting instance %s" % instance.name)
14239 result = self.rpc.call_instance_start(src_node,
14240 (instance, None, None), False)
14241 msg = result.fail_msg
14243 feedback_fn("Failed to start instance: %s" % msg)
14244 _ShutdownInstanceDisks(self, instance)
14245 raise errors.OpExecError("Could not start instance: %s" % msg)
14247 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14248 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14249 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14250 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14251 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14253 (key_name, _, _) = self.x509_key_name
14256 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14259 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14260 key_name, dest_ca_pem,
14265 # Check for backwards compatibility
14266 assert len(dresults) == len(instance.disks)
14267 assert compat.all(isinstance(i, bool) for i in dresults), \
14268 "Not all results are boolean: %r" % dresults
14272 feedback_fn("Deactivating disks for %s" % instance.name)
14273 _ShutdownInstanceDisks(self, instance)
14275 if not (compat.all(dresults) and fin_resu):
14278 failures.append("export finalization")
14279 if not compat.all(dresults):
14280 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14282 failures.append("disk export: disk(s) %s" % fdsk)
14284 raise errors.OpExecError("Export failed, errors in %s" %
14285 utils.CommaJoin(failures))
14287 # At this point, the export was successful, we can cleanup/finish
14289 # Remove instance if requested
14290 if self.op.remove_instance:
14291 feedback_fn("Removing instance %s" % instance.name)
14292 _RemoveInstance(self, feedback_fn, instance,
14293 self.op.ignore_remove_failures)
14295 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14296 self._CleanupExports(feedback_fn)
14298 return fin_resu, dresults
14301 class LUBackupRemove(NoHooksLU):
14302 """Remove exports related to the named instance.
14307 def ExpandNames(self):
14308 self.needed_locks = {}
14309 # We need all nodes to be locked in order for RemoveExport to work, but we
14310 # don't need to lock the instance itself, as nothing will happen to it (and
14311 # we can remove exports also for a removed instance)
14312 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14314 def Exec(self, feedback_fn):
14315 """Remove any export.
14318 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14319 # If the instance was not found we'll try with the name that was passed in.
14320 # This will only work if it was an FQDN, though.
14322 if not instance_name:
14324 instance_name = self.op.instance_name
14326 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14327 exportlist = self.rpc.call_export_list(locked_nodes)
14329 for node in exportlist:
14330 msg = exportlist[node].fail_msg
14332 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14334 if instance_name in exportlist[node].payload:
14336 result = self.rpc.call_export_remove(node, instance_name)
14337 msg = result.fail_msg
14339 logging.error("Could not remove export for instance %s"
14340 " on node %s: %s", instance_name, node, msg)
14342 if fqdn_warn and not found:
14343 feedback_fn("Export not found. If trying to remove an export belonging"
14344 " to a deleted instance please use its Fully Qualified"
14348 class LUGroupAdd(LogicalUnit):
14349 """Logical unit for creating node groups.
14352 HPATH = "group-add"
14353 HTYPE = constants.HTYPE_GROUP
14356 def ExpandNames(self):
14357 # We need the new group's UUID here so that we can create and acquire the
14358 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14359 # that it should not check whether the UUID exists in the configuration.
14360 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14361 self.needed_locks = {}
14362 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14364 def CheckPrereq(self):
14365 """Check prerequisites.
14367 This checks that the given group name is not an existing node group
14372 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14373 except errors.OpPrereqError:
14376 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14377 " node group (UUID: %s)" %
14378 (self.op.group_name, existing_uuid),
14379 errors.ECODE_EXISTS)
14381 if self.op.ndparams:
14382 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14384 if self.op.hv_state:
14385 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14387 self.new_hv_state = None
14389 if self.op.disk_state:
14390 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14392 self.new_disk_state = None
14394 if self.op.diskparams:
14395 for templ in constants.DISK_TEMPLATES:
14396 if templ in self.op.diskparams:
14397 utils.ForceDictType(self.op.diskparams[templ],
14398 constants.DISK_DT_TYPES)
14399 self.new_diskparams = self.op.diskparams
14401 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14402 except errors.OpPrereqError, err:
14403 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14404 errors.ECODE_INVAL)
14406 self.new_diskparams = {}
14408 if self.op.ipolicy:
14409 cluster = self.cfg.GetClusterInfo()
14410 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14412 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14413 except errors.ConfigurationError, err:
14414 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14415 errors.ECODE_INVAL)
14417 def BuildHooksEnv(self):
14418 """Build hooks env.
14422 "GROUP_NAME": self.op.group_name,
14425 def BuildHooksNodes(self):
14426 """Build hooks nodes.
14429 mn = self.cfg.GetMasterNode()
14430 return ([mn], [mn])
14432 def Exec(self, feedback_fn):
14433 """Add the node group to the cluster.
14436 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14437 uuid=self.group_uuid,
14438 alloc_policy=self.op.alloc_policy,
14439 ndparams=self.op.ndparams,
14440 diskparams=self.new_diskparams,
14441 ipolicy=self.op.ipolicy,
14442 hv_state_static=self.new_hv_state,
14443 disk_state_static=self.new_disk_state)
14445 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14446 del self.remove_locks[locking.LEVEL_NODEGROUP]
14449 class LUGroupAssignNodes(NoHooksLU):
14450 """Logical unit for assigning nodes to groups.
14455 def ExpandNames(self):
14456 # These raise errors.OpPrereqError on their own:
14457 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14458 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14460 # We want to lock all the affected nodes and groups. We have readily
14461 # available the list of nodes, and the *destination* group. To gather the
14462 # list of "source" groups, we need to fetch node information later on.
14463 self.needed_locks = {
14464 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14465 locking.LEVEL_NODE: self.op.nodes,
14468 def DeclareLocks(self, level):
14469 if level == locking.LEVEL_NODEGROUP:
14470 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14472 # Try to get all affected nodes' groups without having the group or node
14473 # lock yet. Needs verification later in the code flow.
14474 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14476 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14478 def CheckPrereq(self):
14479 """Check prerequisites.
14482 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14483 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14484 frozenset(self.op.nodes))
14486 expected_locks = (set([self.group_uuid]) |
14487 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14488 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14489 if actual_locks != expected_locks:
14490 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14491 " current groups are '%s', used to be '%s'" %
14492 (utils.CommaJoin(expected_locks),
14493 utils.CommaJoin(actual_locks)))
14495 self.node_data = self.cfg.GetAllNodesInfo()
14496 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14497 instance_data = self.cfg.GetAllInstancesInfo()
14499 if self.group is None:
14500 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14501 (self.op.group_name, self.group_uuid))
14503 (new_splits, previous_splits) = \
14504 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14505 for node in self.op.nodes],
14506 self.node_data, instance_data)
14509 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14511 if not self.op.force:
14512 raise errors.OpExecError("The following instances get split by this"
14513 " change and --force was not given: %s" %
14516 self.LogWarning("This operation will split the following instances: %s",
14519 if previous_splits:
14520 self.LogWarning("In addition, these already-split instances continue"
14521 " to be split across groups: %s",
14522 utils.CommaJoin(utils.NiceSort(previous_splits)))
14524 def Exec(self, feedback_fn):
14525 """Assign nodes to a new group.
14528 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14530 self.cfg.AssignGroupNodes(mods)
14533 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14534 """Check for split instances after a node assignment.
14536 This method considers a series of node assignments as an atomic operation,
14537 and returns information about split instances after applying the set of
14540 In particular, it returns information about newly split instances, and
14541 instances that were already split, and remain so after the change.
14543 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14546 @type changes: list of (node_name, new_group_uuid) pairs.
14547 @param changes: list of node assignments to consider.
14548 @param node_data: a dict with data for all nodes
14549 @param instance_data: a dict with all instances to consider
14550 @rtype: a two-tuple
14551 @return: a list of instances that were previously okay and result split as a
14552 consequence of this change, and a list of instances that were previously
14553 split and this change does not fix.
14556 changed_nodes = dict((node, group) for node, group in changes
14557 if node_data[node].group != group)
14559 all_split_instances = set()
14560 previously_split_instances = set()
14562 def InstanceNodes(instance):
14563 return [instance.primary_node] + list(instance.secondary_nodes)
14565 for inst in instance_data.values():
14566 if inst.disk_template not in constants.DTS_INT_MIRROR:
14569 instance_nodes = InstanceNodes(inst)
14571 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14572 previously_split_instances.add(inst.name)
14574 if len(set(changed_nodes.get(node, node_data[node].group)
14575 for node in instance_nodes)) > 1:
14576 all_split_instances.add(inst.name)
14578 return (list(all_split_instances - previously_split_instances),
14579 list(previously_split_instances & all_split_instances))
14582 class _GroupQuery(_QueryBase):
14583 FIELDS = query.GROUP_FIELDS
14585 def ExpandNames(self, lu):
14586 lu.needed_locks = {}
14588 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14589 self._cluster = lu.cfg.GetClusterInfo()
14590 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14593 self.wanted = [name_to_uuid[name]
14594 for name in utils.NiceSort(name_to_uuid.keys())]
14596 # Accept names to be either names or UUIDs.
14599 all_uuid = frozenset(self._all_groups.keys())
14601 for name in self.names:
14602 if name in all_uuid:
14603 self.wanted.append(name)
14604 elif name in name_to_uuid:
14605 self.wanted.append(name_to_uuid[name])
14607 missing.append(name)
14610 raise errors.OpPrereqError("Some groups do not exist: %s" %
14611 utils.CommaJoin(missing),
14612 errors.ECODE_NOENT)
14614 def DeclareLocks(self, lu, level):
14617 def _GetQueryData(self, lu):
14618 """Computes the list of node groups and their attributes.
14621 do_nodes = query.GQ_NODE in self.requested_data
14622 do_instances = query.GQ_INST in self.requested_data
14624 group_to_nodes = None
14625 group_to_instances = None
14627 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14628 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14629 # latter GetAllInstancesInfo() is not enough, for we have to go through
14630 # instance->node. Hence, we will need to process nodes even if we only need
14631 # instance information.
14632 if do_nodes or do_instances:
14633 all_nodes = lu.cfg.GetAllNodesInfo()
14634 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14637 for node in all_nodes.values():
14638 if node.group in group_to_nodes:
14639 group_to_nodes[node.group].append(node.name)
14640 node_to_group[node.name] = node.group
14643 all_instances = lu.cfg.GetAllInstancesInfo()
14644 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14646 for instance in all_instances.values():
14647 node = instance.primary_node
14648 if node in node_to_group:
14649 group_to_instances[node_to_group[node]].append(instance.name)
14652 # Do not pass on node information if it was not requested.
14653 group_to_nodes = None
14655 return query.GroupQueryData(self._cluster,
14656 [self._all_groups[uuid]
14657 for uuid in self.wanted],
14658 group_to_nodes, group_to_instances,
14659 query.GQ_DISKPARAMS in self.requested_data)
14662 class LUGroupQuery(NoHooksLU):
14663 """Logical unit for querying node groups.
14668 def CheckArguments(self):
14669 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14670 self.op.output_fields, False)
14672 def ExpandNames(self):
14673 self.gq.ExpandNames(self)
14675 def DeclareLocks(self, level):
14676 self.gq.DeclareLocks(self, level)
14678 def Exec(self, feedback_fn):
14679 return self.gq.OldStyleQuery(self)
14682 class LUGroupSetParams(LogicalUnit):
14683 """Modifies the parameters of a node group.
14686 HPATH = "group-modify"
14687 HTYPE = constants.HTYPE_GROUP
14690 def CheckArguments(self):
14693 self.op.diskparams,
14694 self.op.alloc_policy,
14696 self.op.disk_state,
14700 if all_changes.count(None) == len(all_changes):
14701 raise errors.OpPrereqError("Please pass at least one modification",
14702 errors.ECODE_INVAL)
14704 def ExpandNames(self):
14705 # This raises errors.OpPrereqError on its own:
14706 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14708 self.needed_locks = {
14709 locking.LEVEL_INSTANCE: [],
14710 locking.LEVEL_NODEGROUP: [self.group_uuid],
14713 self.share_locks[locking.LEVEL_INSTANCE] = 1
14715 def DeclareLocks(self, level):
14716 if level == locking.LEVEL_INSTANCE:
14717 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14719 # Lock instances optimistically, needs verification once group lock has
14721 self.needed_locks[locking.LEVEL_INSTANCE] = \
14722 self.cfg.GetNodeGroupInstances(self.group_uuid)
14725 def _UpdateAndVerifyDiskParams(old, new):
14726 """Updates and verifies disk parameters.
14729 new_params = _GetUpdatedParams(old, new)
14730 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14733 def CheckPrereq(self):
14734 """Check prerequisites.
14737 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14739 # Check if locked instances are still correct
14740 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14742 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14743 cluster = self.cfg.GetClusterInfo()
14745 if self.group is None:
14746 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14747 (self.op.group_name, self.group_uuid))
14749 if self.op.ndparams:
14750 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14751 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14752 self.new_ndparams = new_ndparams
14754 if self.op.diskparams:
14755 diskparams = self.group.diskparams
14756 uavdp = self._UpdateAndVerifyDiskParams
14757 # For each disktemplate subdict update and verify the values
14758 new_diskparams = dict((dt,
14759 uavdp(diskparams.get(dt, {}),
14760 self.op.diskparams[dt]))
14761 for dt in constants.DISK_TEMPLATES
14762 if dt in self.op.diskparams)
14763 # As we've all subdicts of diskparams ready, lets merge the actual
14764 # dict with all updated subdicts
14765 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14767 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14768 except errors.OpPrereqError, err:
14769 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14770 errors.ECODE_INVAL)
14772 if self.op.hv_state:
14773 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14774 self.group.hv_state_static)
14776 if self.op.disk_state:
14777 self.new_disk_state = \
14778 _MergeAndVerifyDiskState(self.op.disk_state,
14779 self.group.disk_state_static)
14781 if self.op.ipolicy:
14782 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14786 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14787 inst_filter = lambda inst: inst.name in owned_instances
14788 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14789 gmi = ganeti.masterd.instance
14791 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14793 new_ipolicy, instances)
14796 self.LogWarning("After the ipolicy change the following instances"
14797 " violate them: %s",
14798 utils.CommaJoin(violations))
14800 def BuildHooksEnv(self):
14801 """Build hooks env.
14805 "GROUP_NAME": self.op.group_name,
14806 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14809 def BuildHooksNodes(self):
14810 """Build hooks nodes.
14813 mn = self.cfg.GetMasterNode()
14814 return ([mn], [mn])
14816 def Exec(self, feedback_fn):
14817 """Modifies the node group.
14822 if self.op.ndparams:
14823 self.group.ndparams = self.new_ndparams
14824 result.append(("ndparams", str(self.group.ndparams)))
14826 if self.op.diskparams:
14827 self.group.diskparams = self.new_diskparams
14828 result.append(("diskparams", str(self.group.diskparams)))
14830 if self.op.alloc_policy:
14831 self.group.alloc_policy = self.op.alloc_policy
14833 if self.op.hv_state:
14834 self.group.hv_state_static = self.new_hv_state
14836 if self.op.disk_state:
14837 self.group.disk_state_static = self.new_disk_state
14839 if self.op.ipolicy:
14840 self.group.ipolicy = self.new_ipolicy
14842 self.cfg.Update(self.group, feedback_fn)
14846 class LUGroupRemove(LogicalUnit):
14847 HPATH = "group-remove"
14848 HTYPE = constants.HTYPE_GROUP
14851 def ExpandNames(self):
14852 # This will raises errors.OpPrereqError on its own:
14853 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14854 self.needed_locks = {
14855 locking.LEVEL_NODEGROUP: [self.group_uuid],
14858 def CheckPrereq(self):
14859 """Check prerequisites.
14861 This checks that the given group name exists as a node group, that is
14862 empty (i.e., contains no nodes), and that is not the last group of the
14866 # Verify that the group is empty.
14867 group_nodes = [node.name
14868 for node in self.cfg.GetAllNodesInfo().values()
14869 if node.group == self.group_uuid]
14872 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14874 (self.op.group_name,
14875 utils.CommaJoin(utils.NiceSort(group_nodes))),
14876 errors.ECODE_STATE)
14878 # Verify the cluster would not be left group-less.
14879 if len(self.cfg.GetNodeGroupList()) == 1:
14880 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14881 " removed" % self.op.group_name,
14882 errors.ECODE_STATE)
14884 def BuildHooksEnv(self):
14885 """Build hooks env.
14889 "GROUP_NAME": self.op.group_name,
14892 def BuildHooksNodes(self):
14893 """Build hooks nodes.
14896 mn = self.cfg.GetMasterNode()
14897 return ([mn], [mn])
14899 def Exec(self, feedback_fn):
14900 """Remove the node group.
14904 self.cfg.RemoveNodeGroup(self.group_uuid)
14905 except errors.ConfigurationError:
14906 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14907 (self.op.group_name, self.group_uuid))
14909 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14912 class LUGroupRename(LogicalUnit):
14913 HPATH = "group-rename"
14914 HTYPE = constants.HTYPE_GROUP
14917 def ExpandNames(self):
14918 # This raises errors.OpPrereqError on its own:
14919 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14921 self.needed_locks = {
14922 locking.LEVEL_NODEGROUP: [self.group_uuid],
14925 def CheckPrereq(self):
14926 """Check prerequisites.
14928 Ensures requested new name is not yet used.
14932 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14933 except errors.OpPrereqError:
14936 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14937 " node group (UUID: %s)" %
14938 (self.op.new_name, new_name_uuid),
14939 errors.ECODE_EXISTS)
14941 def BuildHooksEnv(self):
14942 """Build hooks env.
14946 "OLD_NAME": self.op.group_name,
14947 "NEW_NAME": self.op.new_name,
14950 def BuildHooksNodes(self):
14951 """Build hooks nodes.
14954 mn = self.cfg.GetMasterNode()
14956 all_nodes = self.cfg.GetAllNodesInfo()
14957 all_nodes.pop(mn, None)
14960 run_nodes.extend(node.name for node in all_nodes.values()
14961 if node.group == self.group_uuid)
14963 return (run_nodes, run_nodes)
14965 def Exec(self, feedback_fn):
14966 """Rename the node group.
14969 group = self.cfg.GetNodeGroup(self.group_uuid)
14972 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14973 (self.op.group_name, self.group_uuid))
14975 group.name = self.op.new_name
14976 self.cfg.Update(group, feedback_fn)
14978 return self.op.new_name
14981 class LUGroupEvacuate(LogicalUnit):
14982 HPATH = "group-evacuate"
14983 HTYPE = constants.HTYPE_GROUP
14986 def ExpandNames(self):
14987 # This raises errors.OpPrereqError on its own:
14988 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14990 if self.op.target_groups:
14991 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14992 self.op.target_groups)
14994 self.req_target_uuids = []
14996 if self.group_uuid in self.req_target_uuids:
14997 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14998 " as a target group (targets are %s)" %
15000 utils.CommaJoin(self.req_target_uuids)),
15001 errors.ECODE_INVAL)
15003 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15005 self.share_locks = _ShareAll()
15006 self.needed_locks = {
15007 locking.LEVEL_INSTANCE: [],
15008 locking.LEVEL_NODEGROUP: [],
15009 locking.LEVEL_NODE: [],
15012 def DeclareLocks(self, level):
15013 if level == locking.LEVEL_INSTANCE:
15014 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15016 # Lock instances optimistically, needs verification once node and group
15017 # locks have been acquired
15018 self.needed_locks[locking.LEVEL_INSTANCE] = \
15019 self.cfg.GetNodeGroupInstances(self.group_uuid)
15021 elif level == locking.LEVEL_NODEGROUP:
15022 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15024 if self.req_target_uuids:
15025 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15027 # Lock all groups used by instances optimistically; this requires going
15028 # via the node before it's locked, requiring verification later on
15029 lock_groups.update(group_uuid
15030 for instance_name in
15031 self.owned_locks(locking.LEVEL_INSTANCE)
15033 self.cfg.GetInstanceNodeGroups(instance_name))
15035 # No target groups, need to lock all of them
15036 lock_groups = locking.ALL_SET
15038 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15040 elif level == locking.LEVEL_NODE:
15041 # This will only lock the nodes in the group to be evacuated which
15042 # contain actual instances
15043 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15044 self._LockInstancesNodes()
15046 # Lock all nodes in group to be evacuated and target groups
15047 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15048 assert self.group_uuid in owned_groups
15049 member_nodes = [node_name
15050 for group in owned_groups
15051 for node_name in self.cfg.GetNodeGroup(group).members]
15052 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15054 def CheckPrereq(self):
15055 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15056 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15057 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15059 assert owned_groups.issuperset(self.req_target_uuids)
15060 assert self.group_uuid in owned_groups
15062 # Check if locked instances are still correct
15063 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15065 # Get instance information
15066 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15068 # Check if node groups for locked instances are still correct
15069 _CheckInstancesNodeGroups(self.cfg, self.instances,
15070 owned_groups, owned_nodes, self.group_uuid)
15072 if self.req_target_uuids:
15073 # User requested specific target groups
15074 self.target_uuids = self.req_target_uuids
15076 # All groups except the one to be evacuated are potential targets
15077 self.target_uuids = [group_uuid for group_uuid in owned_groups
15078 if group_uuid != self.group_uuid]
15080 if not self.target_uuids:
15081 raise errors.OpPrereqError("There are no possible target groups",
15082 errors.ECODE_INVAL)
15084 def BuildHooksEnv(self):
15085 """Build hooks env.
15089 "GROUP_NAME": self.op.group_name,
15090 "TARGET_GROUPS": " ".join(self.target_uuids),
15093 def BuildHooksNodes(self):
15094 """Build hooks nodes.
15097 mn = self.cfg.GetMasterNode()
15099 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15101 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15103 return (run_nodes, run_nodes)
15105 def Exec(self, feedback_fn):
15106 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15108 assert self.group_uuid not in self.target_uuids
15110 req = iallocator.IAReqGroupChange(instances=instances,
15111 target_groups=self.target_uuids)
15112 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15114 ial.Run(self.op.iallocator)
15116 if not ial.success:
15117 raise errors.OpPrereqError("Can't compute group evacuation using"
15118 " iallocator '%s': %s" %
15119 (self.op.iallocator, ial.info),
15120 errors.ECODE_NORES)
15122 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15124 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15125 len(jobs), self.op.group_name)
15127 return ResultWithJobs(jobs)
15130 class TagsLU(NoHooksLU): # pylint: disable=W0223
15131 """Generic tags LU.
15133 This is an abstract class which is the parent of all the other tags LUs.
15136 def ExpandNames(self):
15137 self.group_uuid = None
15138 self.needed_locks = {}
15140 if self.op.kind == constants.TAG_NODE:
15141 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15142 lock_level = locking.LEVEL_NODE
15143 lock_name = self.op.name
15144 elif self.op.kind == constants.TAG_INSTANCE:
15145 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15146 lock_level = locking.LEVEL_INSTANCE
15147 lock_name = self.op.name
15148 elif self.op.kind == constants.TAG_NODEGROUP:
15149 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15150 lock_level = locking.LEVEL_NODEGROUP
15151 lock_name = self.group_uuid
15152 elif self.op.kind == constants.TAG_NETWORK:
15153 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15154 lock_level = locking.LEVEL_NETWORK
15155 lock_name = self.network_uuid
15160 if lock_level and getattr(self.op, "use_locking", True):
15161 self.needed_locks[lock_level] = lock_name
15163 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15164 # not possible to acquire the BGL based on opcode parameters)
15166 def CheckPrereq(self):
15167 """Check prerequisites.
15170 if self.op.kind == constants.TAG_CLUSTER:
15171 self.target = self.cfg.GetClusterInfo()
15172 elif self.op.kind == constants.TAG_NODE:
15173 self.target = self.cfg.GetNodeInfo(self.op.name)
15174 elif self.op.kind == constants.TAG_INSTANCE:
15175 self.target = self.cfg.GetInstanceInfo(self.op.name)
15176 elif self.op.kind == constants.TAG_NODEGROUP:
15177 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15178 elif self.op.kind == constants.TAG_NETWORK:
15179 self.target = self.cfg.GetNetwork(self.network_uuid)
15181 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15182 str(self.op.kind), errors.ECODE_INVAL)
15185 class LUTagsGet(TagsLU):
15186 """Returns the tags of a given object.
15191 def ExpandNames(self):
15192 TagsLU.ExpandNames(self)
15194 # Share locks as this is only a read operation
15195 self.share_locks = _ShareAll()
15197 def Exec(self, feedback_fn):
15198 """Returns the tag list.
15201 return list(self.target.GetTags())
15204 class LUTagsSearch(NoHooksLU):
15205 """Searches the tags for a given pattern.
15210 def ExpandNames(self):
15211 self.needed_locks = {}
15213 def CheckPrereq(self):
15214 """Check prerequisites.
15216 This checks the pattern passed for validity by compiling it.
15220 self.re = re.compile(self.op.pattern)
15221 except re.error, err:
15222 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15223 (self.op.pattern, err), errors.ECODE_INVAL)
15225 def Exec(self, feedback_fn):
15226 """Returns the tag list.
15230 tgts = [("/cluster", cfg.GetClusterInfo())]
15231 ilist = cfg.GetAllInstancesInfo().values()
15232 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15233 nlist = cfg.GetAllNodesInfo().values()
15234 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15235 tgts.extend(("/nodegroup/%s" % n.name, n)
15236 for n in cfg.GetAllNodeGroupsInfo().values())
15238 for path, target in tgts:
15239 for tag in target.GetTags():
15240 if self.re.search(tag):
15241 results.append((path, tag))
15245 class LUTagsSet(TagsLU):
15246 """Sets a tag on a given object.
15251 def CheckPrereq(self):
15252 """Check prerequisites.
15254 This checks the type and length of the tag name and value.
15257 TagsLU.CheckPrereq(self)
15258 for tag in self.op.tags:
15259 objects.TaggableObject.ValidateTag(tag)
15261 def Exec(self, feedback_fn):
15266 for tag in self.op.tags:
15267 self.target.AddTag(tag)
15268 except errors.TagError, err:
15269 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15270 self.cfg.Update(self.target, feedback_fn)
15273 class LUTagsDel(TagsLU):
15274 """Delete a list of tags from a given object.
15279 def CheckPrereq(self):
15280 """Check prerequisites.
15282 This checks that we have the given tag.
15285 TagsLU.CheckPrereq(self)
15286 for tag in self.op.tags:
15287 objects.TaggableObject.ValidateTag(tag)
15288 del_tags = frozenset(self.op.tags)
15289 cur_tags = self.target.GetTags()
15291 diff_tags = del_tags - cur_tags
15293 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15294 raise errors.OpPrereqError("Tag(s) %s not found" %
15295 (utils.CommaJoin(diff_names), ),
15296 errors.ECODE_NOENT)
15298 def Exec(self, feedback_fn):
15299 """Remove the tag from the object.
15302 for tag in self.op.tags:
15303 self.target.RemoveTag(tag)
15304 self.cfg.Update(self.target, feedback_fn)
15307 class LUTestDelay(NoHooksLU):
15308 """Sleep for a specified amount of time.
15310 This LU sleeps on the master and/or nodes for a specified amount of
15316 def ExpandNames(self):
15317 """Expand names and set required locks.
15319 This expands the node list, if any.
15322 self.needed_locks = {}
15323 if self.op.on_nodes:
15324 # _GetWantedNodes can be used here, but is not always appropriate to use
15325 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15326 # more information.
15327 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15328 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15330 def _TestDelay(self):
15331 """Do the actual sleep.
15334 if self.op.on_master:
15335 if not utils.TestDelay(self.op.duration):
15336 raise errors.OpExecError("Error during master delay test")
15337 if self.op.on_nodes:
15338 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15339 for node, node_result in result.items():
15340 node_result.Raise("Failure during rpc call to node %s" % node)
15342 def Exec(self, feedback_fn):
15343 """Execute the test delay opcode, with the wanted repetitions.
15346 if self.op.repeat == 0:
15349 top_value = self.op.repeat - 1
15350 for i in range(self.op.repeat):
15351 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15355 class LURestrictedCommand(NoHooksLU):
15356 """Logical unit for executing restricted commands.
15361 def ExpandNames(self):
15363 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15365 self.needed_locks = {
15366 locking.LEVEL_NODE: self.op.nodes,
15368 self.share_locks = {
15369 locking.LEVEL_NODE: not self.op.use_locking,
15372 def CheckPrereq(self):
15373 """Check prerequisites.
15377 def Exec(self, feedback_fn):
15378 """Execute restricted command and return output.
15381 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15383 # Check if correct locks are held
15384 assert set(self.op.nodes).issubset(owned_nodes)
15386 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15390 for node_name in self.op.nodes:
15391 nres = rpcres[node_name]
15393 msg = ("Command '%s' on node '%s' failed: %s" %
15394 (self.op.command, node_name, nres.fail_msg))
15395 result.append((False, msg))
15397 result.append((True, nres.payload))
15402 class LUTestJqueue(NoHooksLU):
15403 """Utility LU to test some aspects of the job queue.
15408 # Must be lower than default timeout for WaitForJobChange to see whether it
15409 # notices changed jobs
15410 _CLIENT_CONNECT_TIMEOUT = 20.0
15411 _CLIENT_CONFIRM_TIMEOUT = 60.0
15414 def _NotifyUsingSocket(cls, cb, errcls):
15415 """Opens a Unix socket and waits for another program to connect.
15418 @param cb: Callback to send socket name to client
15419 @type errcls: class
15420 @param errcls: Exception class to use for errors
15423 # Using a temporary directory as there's no easy way to create temporary
15424 # sockets without writing a custom loop around tempfile.mktemp and
15426 tmpdir = tempfile.mkdtemp()
15428 tmpsock = utils.PathJoin(tmpdir, "sock")
15430 logging.debug("Creating temporary socket at %s", tmpsock)
15431 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15436 # Send details to client
15439 # Wait for client to connect before continuing
15440 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15442 (conn, _) = sock.accept()
15443 except socket.error, err:
15444 raise errcls("Client didn't connect in time (%s)" % err)
15448 # Remove as soon as client is connected
15449 shutil.rmtree(tmpdir)
15451 # Wait for client to close
15454 # pylint: disable=E1101
15455 # Instance of '_socketobject' has no ... member
15456 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15458 except socket.error, err:
15459 raise errcls("Client failed to confirm notification (%s)" % err)
15463 def _SendNotification(self, test, arg, sockname):
15464 """Sends a notification to the client.
15467 @param test: Test name
15468 @param arg: Test argument (depends on test)
15469 @type sockname: string
15470 @param sockname: Socket path
15473 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15475 def _Notify(self, prereq, test, arg):
15476 """Notifies the client of a test.
15479 @param prereq: Whether this is a prereq-phase test
15481 @param test: Test name
15482 @param arg: Test argument (depends on test)
15486 errcls = errors.OpPrereqError
15488 errcls = errors.OpExecError
15490 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15494 def CheckArguments(self):
15495 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15496 self.expandnames_calls = 0
15498 def ExpandNames(self):
15499 checkargs_calls = getattr(self, "checkargs_calls", 0)
15500 if checkargs_calls < 1:
15501 raise errors.ProgrammerError("CheckArguments was not called")
15503 self.expandnames_calls += 1
15505 if self.op.notify_waitlock:
15506 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15508 self.LogInfo("Expanding names")
15510 # Get lock on master node (just to get a lock, not for a particular reason)
15511 self.needed_locks = {
15512 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15515 def Exec(self, feedback_fn):
15516 if self.expandnames_calls < 1:
15517 raise errors.ProgrammerError("ExpandNames was not called")
15519 if self.op.notify_exec:
15520 self._Notify(False, constants.JQT_EXEC, None)
15522 self.LogInfo("Executing")
15524 if self.op.log_messages:
15525 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15526 for idx, msg in enumerate(self.op.log_messages):
15527 self.LogInfo("Sending log message %s", idx + 1)
15528 feedback_fn(constants.JQT_MSGPREFIX + msg)
15529 # Report how many test messages have been sent
15530 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15533 raise errors.OpExecError("Opcode failure was requested")
15538 class LUTestAllocator(NoHooksLU):
15539 """Run allocator tests.
15541 This LU runs the allocator tests
15544 def CheckPrereq(self):
15545 """Check prerequisites.
15547 This checks the opcode parameters depending on the director and mode test.
15550 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15551 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15552 for attr in ["memory", "disks", "disk_template",
15553 "os", "tags", "nics", "vcpus"]:
15554 if not hasattr(self.op, attr):
15555 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15556 attr, errors.ECODE_INVAL)
15557 iname = self.cfg.ExpandInstanceName(self.op.name)
15558 if iname is not None:
15559 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15560 iname, errors.ECODE_EXISTS)
15561 if not isinstance(self.op.nics, list):
15562 raise errors.OpPrereqError("Invalid parameter 'nics'",
15563 errors.ECODE_INVAL)
15564 if not isinstance(self.op.disks, list):
15565 raise errors.OpPrereqError("Invalid parameter 'disks'",
15566 errors.ECODE_INVAL)
15567 for row in self.op.disks:
15568 if (not isinstance(row, dict) or
15569 constants.IDISK_SIZE not in row or
15570 not isinstance(row[constants.IDISK_SIZE], int) or
15571 constants.IDISK_MODE not in row or
15572 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15573 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15574 " parameter", errors.ECODE_INVAL)
15575 if self.op.hypervisor is None:
15576 self.op.hypervisor = self.cfg.GetHypervisorType()
15577 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15578 fname = _ExpandInstanceName(self.cfg, self.op.name)
15579 self.op.name = fname
15580 self.relocate_from = \
15581 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15582 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15583 constants.IALLOCATOR_MODE_NODE_EVAC):
15584 if not self.op.instances:
15585 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15586 self.op.instances = _GetWantedInstances(self, self.op.instances)
15588 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15589 self.op.mode, errors.ECODE_INVAL)
15591 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15592 if self.op.iallocator is None:
15593 raise errors.OpPrereqError("Missing allocator name",
15594 errors.ECODE_INVAL)
15595 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15596 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15597 self.op.direction, errors.ECODE_INVAL)
15599 def Exec(self, feedback_fn):
15600 """Run the allocator test.
15603 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15604 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15605 memory=self.op.memory,
15606 disks=self.op.disks,
15607 disk_template=self.op.disk_template,
15611 vcpus=self.op.vcpus,
15612 spindle_use=self.op.spindle_use,
15613 hypervisor=self.op.hypervisor)
15614 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15615 req = iallocator.IAReqRelocate(name=self.op.name,
15616 relocate_from=list(self.relocate_from))
15617 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15618 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15619 target_groups=self.op.target_groups)
15620 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15621 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15622 evac_mode=self.op.evac_mode)
15623 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15624 disk_template = self.op.disk_template
15625 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15626 memory=self.op.memory,
15627 disks=self.op.disks,
15628 disk_template=disk_template,
15632 vcpus=self.op.vcpus,
15633 spindle_use=self.op.spindle_use,
15634 hypervisor=self.op.hypervisor)
15635 for idx in range(self.op.count)]
15636 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15638 raise errors.ProgrammerError("Uncatched mode %s in"
15639 " LUTestAllocator.Exec", self.op.mode)
15641 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15642 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15643 result = ial.in_text
15645 ial.Run(self.op.iallocator, validate=False)
15646 result = ial.out_text
15650 class LUNetworkAdd(LogicalUnit):
15651 """Logical unit for creating networks.
15654 HPATH = "network-add"
15655 HTYPE = constants.HTYPE_NETWORK
15658 def BuildHooksNodes(self):
15659 """Build hooks nodes.
15662 mn = self.cfg.GetMasterNode()
15663 return ([mn], [mn])
15665 def ExpandNames(self):
15666 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15668 if self.op.conflicts_check:
15669 self.share_locks[locking.LEVEL_NODE] = 1
15670 self.needed_locks = {
15671 locking.LEVEL_NODE: locking.ALL_SET,
15674 self.needed_locks = {}
15676 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15678 def CheckPrereq(self):
15679 """Check prerequisites.
15681 This checks that the given group name is not an existing node group
15685 if self.op.network is None:
15686 raise errors.OpPrereqError("Network must be given",
15687 errors.ECODE_INVAL)
15689 uuid = self.cfg.LookupNetwork(self.op.network_name)
15692 raise errors.OpPrereqError("Network '%s' already defined" %
15693 self.op.network, errors.ECODE_EXISTS)
15695 if self.op.mac_prefix:
15696 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15698 # Check tag validity
15699 for tag in self.op.tags:
15700 objects.TaggableObject.ValidateTag(tag)
15702 def BuildHooksEnv(self):
15703 """Build hooks env.
15707 "name": self.op.network_name,
15708 "subnet": self.op.network,
15709 "gateway": self.op.gateway,
15710 "network6": self.op.network6,
15711 "gateway6": self.op.gateway6,
15712 "mac_prefix": self.op.mac_prefix,
15713 "network_type": self.op.network_type,
15714 "tags": self.op.tags,
15716 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15718 def Exec(self, feedback_fn):
15719 """Add the ip pool to the cluster.
15722 nobj = objects.Network(name=self.op.network_name,
15723 network=self.op.network,
15724 gateway=self.op.gateway,
15725 network6=self.op.network6,
15726 gateway6=self.op.gateway6,
15727 mac_prefix=self.op.mac_prefix,
15728 network_type=self.op.network_type,
15729 uuid=self.network_uuid,
15730 family=constants.IP4_VERSION)
15731 # Initialize the associated address pool
15733 pool = network.AddressPool.InitializeNetwork(nobj)
15734 except errors.AddressPoolError, e:
15735 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15737 # Check if we need to reserve the nodes and the cluster master IP
15738 # These may not be allocated to any instances in routed mode, as
15739 # they wouldn't function anyway.
15740 if self.op.conflicts_check:
15741 for node in self.cfg.GetAllNodesInfo().values():
15742 for ip in [node.primary_ip, node.secondary_ip]:
15744 if pool.Contains(ip):
15746 self.LogInfo("Reserved IP address of node '%s' (%s)",
15748 except errors.AddressPoolError:
15749 self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
15752 master_ip = self.cfg.GetClusterInfo().master_ip
15754 if pool.Contains(master_ip):
15755 pool.Reserve(master_ip)
15756 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15757 except errors.AddressPoolError:
15758 self.LogWarning("Cannot reserve cluster master IP address (%s)",
15761 if self.op.add_reserved_ips:
15762 for ip in self.op.add_reserved_ips:
15764 pool.Reserve(ip, external=True)
15765 except errors.AddressPoolError, e:
15766 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15769 for tag in self.op.tags:
15772 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15773 del self.remove_locks[locking.LEVEL_NETWORK]
15776 class LUNetworkRemove(LogicalUnit):
15777 HPATH = "network-remove"
15778 HTYPE = constants.HTYPE_NETWORK
15781 def ExpandNames(self):
15782 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15784 if not self.network_uuid:
15785 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15786 errors.ECODE_INVAL)
15788 self.share_locks[locking.LEVEL_NODEGROUP] = 1
15789 self.needed_locks = {
15790 locking.LEVEL_NETWORK: [self.network_uuid],
15791 locking.LEVEL_NODEGROUP: locking.ALL_SET,
15794 def CheckPrereq(self):
15795 """Check prerequisites.
15797 This checks that the given network name exists as a network, that is
15798 empty (i.e., contains no nodes), and that is not the last group of the
15803 # Verify that the network is not conncted.
15804 node_groups = [group.name
15805 for group in self.cfg.GetAllNodeGroupsInfo().values()
15806 for net in group.networks.keys()
15807 if net == self.network_uuid]
15810 self.LogWarning("Nework '%s' is connected to the following"
15811 " node groups: %s" % (self.op.network_name,
15812 utils.CommaJoin(utils.NiceSort(node_groups))))
15813 raise errors.OpPrereqError("Network still connected",
15814 errors.ECODE_STATE)
15816 def BuildHooksEnv(self):
15817 """Build hooks env.
15821 "NETWORK_NAME": self.op.network_name,
15824 def BuildHooksNodes(self):
15825 """Build hooks nodes.
15828 mn = self.cfg.GetMasterNode()
15829 return ([mn], [mn])
15831 def Exec(self, feedback_fn):
15832 """Remove the network.
15836 self.cfg.RemoveNetwork(self.network_uuid)
15837 except errors.ConfigurationError:
15838 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15839 (self.op.network_name, self.network_uuid))
15842 class LUNetworkSetParams(LogicalUnit):
15843 """Modifies the parameters of a network.
15846 HPATH = "network-modify"
15847 HTYPE = constants.HTYPE_NETWORK
15850 def CheckArguments(self):
15851 if (self.op.gateway and
15852 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15853 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15854 " at once", errors.ECODE_INVAL)
15856 def ExpandNames(self):
15857 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15858 self.network = self.cfg.GetNetwork(self.network_uuid)
15859 if self.network is None:
15860 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15861 (self.op.network_name, self.network_uuid),
15862 errors.ECODE_INVAL)
15863 self.needed_locks = {
15864 locking.LEVEL_NETWORK: [self.network_uuid],
15867 def CheckPrereq(self):
15868 """Check prerequisites.
15871 self.gateway = self.network.gateway
15872 self.network_type = self.network.network_type
15873 self.mac_prefix = self.network.mac_prefix
15874 self.network6 = self.network.network6
15875 self.gateway6 = self.network.gateway6
15876 self.tags = self.network.tags
15878 self.pool = network.AddressPool(self.network)
15880 if self.op.gateway:
15881 if self.op.gateway == constants.VALUE_NONE:
15882 self.gateway = None
15884 self.gateway = self.op.gateway
15885 if self.pool.IsReserved(self.gateway):
15886 raise errors.OpPrereqError("%s is already reserved" %
15887 self.gateway, errors.ECODE_INVAL)
15889 if self.op.network_type:
15890 if self.op.network_type == constants.VALUE_NONE:
15891 self.network_type = None
15893 self.network_type = self.op.network_type
15895 if self.op.mac_prefix:
15896 if self.op.mac_prefix == constants.VALUE_NONE:
15897 self.mac_prefix = None
15899 utils.NormalizeAndValidateMac(self.op.mac_prefix + ":00:00:00")
15900 self.mac_prefix = self.op.mac_prefix
15902 if self.op.gateway6:
15903 if self.op.gateway6 == constants.VALUE_NONE:
15904 self.gateway6 = None
15906 self.gateway6 = self.op.gateway6
15908 if self.op.network6:
15909 if self.op.network6 == constants.VALUE_NONE:
15910 self.network6 = None
15912 self.network6 = self.op.network6
15914 def BuildHooksEnv(self):
15915 """Build hooks env.
15919 "name": self.op.network_name,
15920 "subnet": self.network.network,
15921 "gateway": self.gateway,
15922 "network6": self.network6,
15923 "gateway6": self.gateway6,
15924 "mac_prefix": self.mac_prefix,
15925 "network_type": self.network_type,
15928 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15930 def BuildHooksNodes(self):
15931 """Build hooks nodes.
15934 mn = self.cfg.GetMasterNode()
15935 return ([mn], [mn])
15937 def Exec(self, feedback_fn):
15938 """Modifies the network.
15941 #TODO: reserve/release via temporary reservation manager
15942 # extend cfg.ReserveIp/ReleaseIp with the external flag
15943 if self.op.gateway:
15944 if self.gateway == self.network.gateway:
15945 self.LogWarning("Gateway is already %s", self.gateway)
15948 self.pool.Reserve(self.gateway, external=True)
15949 if self.network.gateway:
15950 self.pool.Release(self.network.gateway, external=True)
15951 self.network.gateway = self.gateway
15953 if self.op.add_reserved_ips:
15954 for ip in self.op.add_reserved_ips:
15956 if self.pool.IsReserved(ip):
15957 self.LogWarning("IP address %s is already reserved", ip)
15959 self.pool.Reserve(ip, external=True)
15960 except errors.AddressPoolError, err:
15961 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
15963 if self.op.remove_reserved_ips:
15964 for ip in self.op.remove_reserved_ips:
15965 if ip == self.network.gateway:
15966 self.LogWarning("Cannot unreserve Gateway's IP")
15969 if not self.pool.IsReserved(ip):
15970 self.LogWarning("IP address %s is already unreserved", ip)
15972 self.pool.Release(ip, external=True)
15973 except errors.AddressPoolError, err:
15974 self.LogWarning("Cannot release IP address %s: %s", ip, err)
15976 if self.op.mac_prefix:
15977 self.network.mac_prefix = self.mac_prefix
15979 if self.op.network6:
15980 self.network.network6 = self.network6
15982 if self.op.gateway6:
15983 self.network.gateway6 = self.gateway6
15985 if self.op.network_type:
15986 self.network.network_type = self.network_type
15988 self.pool.Validate()
15990 self.cfg.Update(self.network, feedback_fn)
15993 class _NetworkQuery(_QueryBase):
15994 FIELDS = query.NETWORK_FIELDS
15996 def ExpandNames(self, lu):
15997 lu.needed_locks = {}
15999 self._all_networks = lu.cfg.GetAllNetworksInfo()
16000 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16003 self.wanted = [name_to_uuid[name]
16004 for name in utils.NiceSort(name_to_uuid.keys())]
16006 # Accept names to be either names or UUIDs.
16009 all_uuid = frozenset(self._all_networks.keys())
16011 for name in self.names:
16012 if name in all_uuid:
16013 self.wanted.append(name)
16014 elif name in name_to_uuid:
16015 self.wanted.append(name_to_uuid[name])
16017 missing.append(name)
16020 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16021 errors.ECODE_NOENT)
16023 def DeclareLocks(self, lu, level):
16026 def _GetQueryData(self, lu):
16027 """Computes the list of networks and their attributes.
16030 do_instances = query.NETQ_INST in self.requested_data
16031 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16032 do_stats = query.NETQ_STATS in self.requested_data
16034 network_to_groups = None
16035 network_to_instances = None
16038 # For NETQ_GROUP, we need to map network->[groups]
16040 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16041 network_to_groups = dict((uuid, []) for uuid in self.wanted)
16044 all_instances = lu.cfg.GetAllInstancesInfo()
16045 all_nodes = lu.cfg.GetAllNodesInfo()
16046 network_to_instances = dict((uuid, []) for uuid in self.wanted)
16048 for group in all_groups.values():
16050 group_nodes = [node.name for node in all_nodes.values() if
16051 node.group == group.uuid]
16052 group_instances = [instance for instance in all_instances.values()
16053 if instance.primary_node in group_nodes]
16055 for net_uuid in group.networks.keys():
16056 if net_uuid in network_to_groups:
16057 netparams = group.networks[net_uuid]
16058 mode = netparams[constants.NIC_MODE]
16059 link = netparams[constants.NIC_LINK]
16060 info = group.name + "(" + mode + ", " + link + ")"
16061 network_to_groups[net_uuid].append(info)
16064 for instance in group_instances:
16065 for nic in instance.nics:
16066 if nic.network == self._all_networks[net_uuid].name:
16067 network_to_instances[net_uuid].append(instance.name)
16072 for uuid, net in self._all_networks.items():
16073 if uuid in self.wanted:
16074 pool = network.AddressPool(net)
16076 "free_count": pool.GetFreeCount(),
16077 "reserved_count": pool.GetReservedCount(),
16078 "map": pool.GetMap(),
16079 "external_reservations":
16080 utils.CommaJoin(pool.GetExternalReservations()),
16083 return query.NetworkQueryData([self._all_networks[uuid]
16084 for uuid in self.wanted],
16086 network_to_instances,
16090 class LUNetworkQuery(NoHooksLU):
16091 """Logical unit for querying networks.
16096 def CheckArguments(self):
16097 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16098 self.op.output_fields, False)
16100 def ExpandNames(self):
16101 self.nq.ExpandNames(self)
16103 def Exec(self, feedback_fn):
16104 return self.nq.OldStyleQuery(self)
16107 class LUNetworkConnect(LogicalUnit):
16108 """Connect a network to a nodegroup
16111 HPATH = "network-connect"
16112 HTYPE = constants.HTYPE_NETWORK
16115 def ExpandNames(self):
16116 self.network_name = self.op.network_name
16117 self.group_name = self.op.group_name
16118 self.network_mode = self.op.network_mode
16119 self.network_link = self.op.network_link
16121 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16122 self.network = self.cfg.GetNetwork(self.network_uuid)
16123 if self.network is None:
16124 raise errors.OpPrereqError("Network %s does not exist" %
16125 self.network_name, errors.ECODE_INVAL)
16127 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16128 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16129 if self.group is None:
16130 raise errors.OpPrereqError("Group %s does not exist" %
16131 self.group_name, errors.ECODE_INVAL)
16133 self.share_locks[locking.LEVEL_INSTANCE] = 1
16134 self.needed_locks = {
16135 locking.LEVEL_INSTANCE: [],
16136 locking.LEVEL_NODEGROUP: [self.group_uuid],
16139 def DeclareLocks(self, level):
16140 if level == locking.LEVEL_INSTANCE:
16141 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16143 # Lock instances optimistically, needs verification once group lock has
16145 if self.op.conflicts_check:
16146 self.needed_locks[locking.LEVEL_INSTANCE] = \
16147 self.cfg.GetNodeGroupInstances(self.group_uuid)
16148 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16150 def BuildHooksEnv(self):
16152 "GROUP_NAME": self.group_name,
16153 "GROUP_NETWORK_MODE": self.network_mode,
16154 "GROUP_NETWORK_LINK": self.network_link,
16156 ret.update(_BuildNetworkHookEnvByObject(self.network))
16159 def BuildHooksNodes(self):
16160 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16161 return (nodes, nodes)
16163 def CheckPrereq(self):
16164 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16165 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16167 assert self.group_uuid in owned_groups
16169 # Check if locked instances are still correct
16170 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16172 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16176 constants.NIC_MODE: self.network_mode,
16177 constants.NIC_LINK: self.network_link,
16179 objects.NIC.CheckParameterSyntax(self.netparams)
16181 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16182 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16183 self.connected = False
16184 if self.network_uuid in self.group.networks:
16185 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16186 (self.network_name, self.group.name))
16187 self.connected = True
16190 if self.op.conflicts_check:
16191 pool = network.AddressPool(self.network)
16192 conflicting_instances = []
16194 for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16195 for idx, nic in enumerate(instance.nics):
16196 if pool.Contains(nic.ip):
16197 conflicting_instances.append((instance.name, idx, nic.ip))
16199 if conflicting_instances:
16200 self.LogWarning("Following occurences use IPs from network %s"
16201 " that is about to connect to nodegroup %s: %s" %
16202 (self.network_name, self.group.name,
16203 l(conflicting_instances)))
16204 raise errors.OpPrereqError("Conflicting IPs found."
16205 " Please remove/modify"
16206 " corresponding NICs",
16207 errors.ECODE_INVAL)
16209 def Exec(self, feedback_fn):
16213 self.group.networks[self.network_uuid] = self.netparams
16214 self.cfg.Update(self.group, feedback_fn)
16217 class LUNetworkDisconnect(LogicalUnit):
16218 """Disconnect a network to a nodegroup
16221 HPATH = "network-disconnect"
16222 HTYPE = constants.HTYPE_NETWORK
16225 def ExpandNames(self):
16226 self.network_name = self.op.network_name
16227 self.group_name = self.op.group_name
16229 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16230 self.network = self.cfg.GetNetwork(self.network_uuid)
16231 if self.network is None:
16232 raise errors.OpPrereqError("Network %s does not exist" %
16233 self.network_name, errors.ECODE_INVAL)
16235 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16236 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16237 if self.group is None:
16238 raise errors.OpPrereqError("Group %s does not exist" %
16239 self.group_name, errors.ECODE_INVAL)
16241 self.needed_locks = {
16242 locking.LEVEL_NODEGROUP: [self.group_uuid],
16244 self.share_locks[locking.LEVEL_INSTANCE] = 1
16246 def DeclareLocks(self, level):
16247 if level == locking.LEVEL_INSTANCE:
16248 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16250 # Lock instances optimistically, needs verification once group lock has
16252 if self.op.conflicts_check:
16253 self.needed_locks[locking.LEVEL_INSTANCE] = \
16254 self.cfg.GetNodeGroupInstances(self.group_uuid)
16256 def BuildHooksEnv(self):
16258 "GROUP_NAME": self.group_name,
16260 ret.update(_BuildNetworkHookEnvByObject(self.network))
16263 def BuildHooksNodes(self):
16264 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16265 return (nodes, nodes)
16267 def CheckPrereq(self):
16268 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16269 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16271 assert self.group_uuid in owned_groups
16273 # Check if locked instances are still correct
16274 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16276 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16279 self.connected = True
16280 if self.network_uuid not in self.group.networks:
16281 self.LogWarning("Network '%s' is not mapped to group '%s'",
16282 self.network_name, self.group.name)
16283 self.connected = False
16286 if self.op.conflicts_check:
16287 conflicting_instances = []
16289 for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16290 for idx, nic in enumerate(instance.nics):
16291 if nic.network == self.network_name:
16292 conflicting_instances.append((instance.name, idx, nic.ip))
16294 if conflicting_instances:
16295 self.LogWarning("Following occurences use IPs from network %s"
16296 " that is about to disconnected from the nodegroup"
16298 (self.network_name, self.group.name,
16299 l(conflicting_instances)))
16300 raise errors.OpPrereqError("Conflicting IPs."
16301 " Please remove/modify"
16302 " corresponding NICS",
16303 errors.ECODE_INVAL)
16305 def Exec(self, feedback_fn):
16306 if not self.connected:
16309 del self.group.networks[self.network_uuid]
16310 self.cfg.Update(self.group, feedback_fn)
16313 #: Query type implementations
16315 constants.QR_CLUSTER: _ClusterQuery,
16316 constants.QR_INSTANCE: _InstanceQuery,
16317 constants.QR_NODE: _NodeQuery,
16318 constants.QR_GROUP: _GroupQuery,
16319 constants.QR_NETWORK: _NetworkQuery,
16320 constants.QR_OS: _OsQuery,
16321 constants.QR_EXPORT: _ExportQuery,
16324 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16327 def _GetQueryImplementation(name):
16328 """Returns the implemtnation for a query type.
16330 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16334 return _QUERY_IMPL[name]
16336 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16337 errors.ECODE_INVAL)
16340 def _CheckForConflictingIp(lu, ip, node):
16341 """In case of conflicting ip raise error.
16344 @param ip: ip address
16346 @param node: node name
16349 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16350 if conf_net is not None:
16351 raise errors.OpPrereqError("Conflicting IP found:"
16352 " %s <> %s." % (ip, conf_net),
16353 errors.ECODE_INVAL)
16355 return (None, None)