4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _CopyLockList(names):
701 """Makes a copy of a list of lock names.
703 Handles L{locking.ALL_SET} correctly.
706 if names == locking.ALL_SET:
707 return locking.ALL_SET
712 def _GetWantedNodes(lu, nodes):
713 """Returns list of checked and expanded node names.
715 @type lu: L{LogicalUnit}
716 @param lu: the logical unit on whose behalf we execute
718 @param nodes: list of node names or None for all nodes
720 @return: the list of nodes, sorted
721 @raise errors.ProgrammerError: if the nodes parameter is wrong type
725 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
727 return utils.NiceSort(lu.cfg.GetNodeList())
730 def _GetWantedInstances(lu, instances):
731 """Returns list of checked and expanded instance names.
733 @type lu: L{LogicalUnit}
734 @param lu: the logical unit on whose behalf we execute
735 @type instances: list
736 @param instances: list of instance names or None for all instances
738 @return: the list of instances, sorted
739 @raise errors.OpPrereqError: if the instances parameter is wrong type
740 @raise errors.OpPrereqError: if any of the passed instances is not found
744 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
746 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
750 def _GetUpdatedParams(old_params, update_dict,
751 use_default=True, use_none=False):
752 """Return the new version of a parameter dictionary.
754 @type old_params: dict
755 @param old_params: old parameters
756 @type update_dict: dict
757 @param update_dict: dict containing new parameter values, or
758 constants.VALUE_DEFAULT to reset the parameter to its default
760 @param use_default: boolean
761 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
762 values as 'to be deleted' values
763 @param use_none: boolean
764 @type use_none: whether to recognise C{None} values as 'to be
767 @return: the new parameter dictionary
770 params_copy = copy.deepcopy(old_params)
771 for key, val in update_dict.iteritems():
772 if ((use_default and val == constants.VALUE_DEFAULT) or
773 (use_none and val is None)):
779 params_copy[key] = val
783 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
784 """Return the new version of a instance policy.
786 @param group_policy: whether this policy applies to a group and thus
787 we should support removal of policy entries
790 use_none = use_default = group_policy
791 ipolicy = copy.deepcopy(old_ipolicy)
792 for key, value in new_ipolicy.items():
793 if key not in constants.IPOLICY_ALL_KEYS:
794 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
796 if key in constants.IPOLICY_ISPECS:
797 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
798 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
800 use_default=use_default)
802 if (not value or value == [constants.VALUE_DEFAULT] or
803 value == constants.VALUE_DEFAULT):
807 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
808 " on the cluster'" % key,
811 if key in constants.IPOLICY_PARAMETERS:
812 # FIXME: we assume all such values are float
814 ipolicy[key] = float(value)
815 except (TypeError, ValueError), err:
816 raise errors.OpPrereqError("Invalid value for attribute"
817 " '%s': '%s', error: %s" %
818 (key, value, err), errors.ECODE_INVAL)
820 # FIXME: we assume all others are lists; this should be redone
822 ipolicy[key] = list(value)
824 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
825 except errors.ConfigurationError, err:
826 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
831 def _UpdateAndVerifySubDict(base, updates, type_check):
832 """Updates and verifies a dict with sub dicts of the same type.
834 @param base: The dict with the old data
835 @param updates: The dict with the new data
836 @param type_check: Dict suitable to ForceDictType to verify correct types
837 @returns: A new dict with updated and verified values
841 new = _GetUpdatedParams(old, value)
842 utils.ForceDictType(new, type_check)
845 ret = copy.deepcopy(base)
846 ret.update(dict((key, fn(base.get(key, {}), value))
847 for key, value in updates.items()))
851 def _MergeAndVerifyHvState(op_input, obj_input):
852 """Combines the hv state from an opcode with the one of the object
854 @param op_input: The input dict from the opcode
855 @param obj_input: The input dict from the objects
856 @return: The verified and updated dict
860 invalid_hvs = set(op_input) - constants.HYPER_TYPES
862 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
863 " %s" % utils.CommaJoin(invalid_hvs),
865 if obj_input is None:
867 type_check = constants.HVSTS_PARAMETER_TYPES
868 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
873 def _MergeAndVerifyDiskState(op_input, obj_input):
874 """Combines the disk state from an opcode with the one of the object
876 @param op_input: The input dict from the opcode
877 @param obj_input: The input dict from the objects
878 @return: The verified and updated dict
881 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
883 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
884 utils.CommaJoin(invalid_dst),
886 type_check = constants.DSS_PARAMETER_TYPES
887 if obj_input is None:
889 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
891 for key, value in op_input.items())
896 def _ReleaseLocks(lu, level, names=None, keep=None):
897 """Releases locks owned by an LU.
899 @type lu: L{LogicalUnit}
900 @param level: Lock level
901 @type names: list or None
902 @param names: Names of locks to release
903 @type keep: list or None
904 @param keep: Names of locks to retain
907 assert not (keep is not None and names is not None), \
908 "Only one of the 'names' and the 'keep' parameters can be given"
910 if names is not None:
911 should_release = names.__contains__
913 should_release = lambda name: name not in keep
915 should_release = None
917 owned = lu.owned_locks(level)
919 # Not owning any lock at this level, do nothing
926 # Determine which locks to release
928 if should_release(name):
933 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
935 # Release just some locks
936 lu.glm.release(level, names=release)
938 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
941 lu.glm.release(level)
943 assert not lu.glm.is_owned(level), "No locks should be owned"
946 def _MapInstanceDisksToNodes(instances):
947 """Creates a map from (node, volume) to instance name.
949 @type instances: list of L{objects.Instance}
950 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
953 return dict(((node, vol), inst.name)
954 for inst in instances
955 for (node, vols) in inst.MapLVsByNode().items()
959 def _RunPostHook(lu, node_name):
960 """Runs the post-hook for an opcode on a single node.
963 hm = lu.proc.BuildHooksManager(lu)
965 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
966 except Exception, err: # pylint: disable=W0703
967 lu.LogWarning("Errors occurred running hooks on %s: %s",
971 def _CheckOutputFields(static, dynamic, selected):
972 """Checks whether all selected fields are valid.
974 @type static: L{utils.FieldSet}
975 @param static: static fields set
976 @type dynamic: L{utils.FieldSet}
977 @param dynamic: dynamic fields set
984 delta = f.NonMatching(selected)
986 raise errors.OpPrereqError("Unknown output fields selected: %s"
987 % ",".join(delta), errors.ECODE_INVAL)
990 def _CheckGlobalHvParams(params):
991 """Validates that given hypervisor params are not global ones.
993 This will ensure that instances don't get customised versions of
997 used_globals = constants.HVC_GLOBALS.intersection(params)
999 msg = ("The following hypervisor parameters are global and cannot"
1000 " be customized at instance level, please modify them at"
1001 " cluster level: %s" % utils.CommaJoin(used_globals))
1002 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1005 def _CheckNodeOnline(lu, node, msg=None):
1006 """Ensure that a given node is online.
1008 @param lu: the LU on behalf of which we make the check
1009 @param node: the node to check
1010 @param msg: if passed, should be a message to replace the default one
1011 @raise errors.OpPrereqError: if the node is offline
1015 msg = "Can't use offline node"
1016 if lu.cfg.GetNodeInfo(node).offline:
1017 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1020 def _CheckNodeNotDrained(lu, node):
1021 """Ensure that a given node is not drained.
1023 @param lu: the LU on behalf of which we make the check
1024 @param node: the node to check
1025 @raise errors.OpPrereqError: if the node is drained
1028 if lu.cfg.GetNodeInfo(node).drained:
1029 raise errors.OpPrereqError("Can't use drained node %s" % node,
1033 def _CheckNodeVmCapable(lu, node):
1034 """Ensure that a given node is vm capable.
1036 @param lu: the LU on behalf of which we make the check
1037 @param node: the node to check
1038 @raise errors.OpPrereqError: if the node is not vm capable
1041 if not lu.cfg.GetNodeInfo(node).vm_capable:
1042 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1046 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1047 """Ensure that a node supports a given OS.
1049 @param lu: the LU on behalf of which we make the check
1050 @param node: the node to check
1051 @param os_name: the OS to query about
1052 @param force_variant: whether to ignore variant errors
1053 @raise errors.OpPrereqError: if the node is not supporting the OS
1056 result = lu.rpc.call_os_get(node, os_name)
1057 result.Raise("OS '%s' not in supported OS list for node %s" %
1059 prereq=True, ecode=errors.ECODE_INVAL)
1060 if not force_variant:
1061 _CheckOSVariant(result.payload, os_name)
1064 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1065 """Ensure that a node has the given secondary ip.
1067 @type lu: L{LogicalUnit}
1068 @param lu: the LU on behalf of which we make the check
1070 @param node: the node to check
1071 @type secondary_ip: string
1072 @param secondary_ip: the ip to check
1073 @type prereq: boolean
1074 @param prereq: whether to throw a prerequisite or an execute error
1075 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1076 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1079 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1080 result.Raise("Failure checking secondary ip on node %s" % node,
1081 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1082 if not result.payload:
1083 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1084 " please fix and re-run this command" % secondary_ip)
1086 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1088 raise errors.OpExecError(msg)
1091 def _GetClusterDomainSecret():
1092 """Reads the cluster domain secret.
1095 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1099 def _CheckInstanceState(lu, instance, req_states, msg=None):
1100 """Ensure that an instance is in one of the required states.
1102 @param lu: the LU on behalf of which we make the check
1103 @param instance: the instance to check
1104 @param msg: if passed, should be a message to replace the default one
1105 @raise errors.OpPrereqError: if the instance is not in the required state
1109 msg = ("can't use instance from outside %s states" %
1110 utils.CommaJoin(req_states))
1111 if instance.admin_state not in req_states:
1112 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1113 (instance.name, instance.admin_state, msg),
1116 if constants.ADMINST_UP not in req_states:
1117 pnode = instance.primary_node
1118 if not lu.cfg.GetNodeInfo(pnode).offline:
1119 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1120 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1121 prereq=True, ecode=errors.ECODE_ENVIRON)
1122 if instance.name in ins_l.payload:
1123 raise errors.OpPrereqError("Instance %s is running, %s" %
1124 (instance.name, msg), errors.ECODE_STATE)
1126 lu.LogWarning("Primary node offline, ignoring check that instance"
1130 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1131 """Computes if value is in the desired range.
1133 @param name: name of the parameter for which we perform the check
1134 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1136 @param ipolicy: dictionary containing min, max and std values
1137 @param value: actual value that we want to use
1138 @return: None or element not meeting the criteria
1142 if value in [None, constants.VALUE_AUTO]:
1144 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1145 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1146 if value > max_v or min_v > value:
1148 fqn = "%s/%s" % (name, qualifier)
1151 return ("%s value %s is not in range [%s, %s]" %
1152 (fqn, value, min_v, max_v))
1156 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1157 nic_count, disk_sizes, spindle_use,
1158 _compute_fn=_ComputeMinMaxSpec):
1159 """Verifies ipolicy against provided specs.
1162 @param ipolicy: The ipolicy
1164 @param mem_size: The memory size
1165 @type cpu_count: int
1166 @param cpu_count: Used cpu cores
1167 @type disk_count: int
1168 @param disk_count: Number of disks used
1169 @type nic_count: int
1170 @param nic_count: Number of nics used
1171 @type disk_sizes: list of ints
1172 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1173 @type spindle_use: int
1174 @param spindle_use: The number of spindles this instance uses
1175 @param _compute_fn: The compute function (unittest only)
1176 @return: A list of violations, or an empty list of no violations are found
1179 assert disk_count == len(disk_sizes)
1182 (constants.ISPEC_MEM_SIZE, "", mem_size),
1183 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1184 (constants.ISPEC_DISK_COUNT, "", disk_count),
1185 (constants.ISPEC_NIC_COUNT, "", nic_count),
1186 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1187 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1188 for idx, d in enumerate(disk_sizes)]
1191 (_compute_fn(name, qualifier, ipolicy, value)
1192 for (name, qualifier, value) in test_settings))
1195 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1196 _compute_fn=_ComputeIPolicySpecViolation):
1197 """Compute if instance meets the specs of ipolicy.
1200 @param ipolicy: The ipolicy to verify against
1201 @type instance: L{objects.Instance}
1202 @param instance: The instance to verify
1203 @param _compute_fn: The function to verify ipolicy (unittest only)
1204 @see: L{_ComputeIPolicySpecViolation}
1207 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1208 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1209 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1210 disk_count = len(instance.disks)
1211 disk_sizes = [disk.size for disk in instance.disks]
1212 nic_count = len(instance.nics)
1214 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1215 disk_sizes, spindle_use)
1218 def _ComputeIPolicyInstanceSpecViolation(
1219 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1220 """Compute if instance specs meets the specs of ipolicy.
1223 @param ipolicy: The ipolicy to verify against
1224 @param instance_spec: dict
1225 @param instance_spec: The instance spec to verify
1226 @param _compute_fn: The function to verify ipolicy (unittest only)
1227 @see: L{_ComputeIPolicySpecViolation}
1230 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1231 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1232 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1233 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1234 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1235 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1237 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1238 disk_sizes, spindle_use)
1241 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1243 _compute_fn=_ComputeIPolicyInstanceViolation):
1244 """Compute if instance meets the specs of the new target group.
1246 @param ipolicy: The ipolicy to verify
1247 @param instance: The instance object to verify
1248 @param current_group: The current group of the instance
1249 @param target_group: The new group of the instance
1250 @param _compute_fn: The function to verify ipolicy (unittest only)
1251 @see: L{_ComputeIPolicySpecViolation}
1254 if current_group == target_group:
1257 return _compute_fn(ipolicy, instance)
1260 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1261 _compute_fn=_ComputeIPolicyNodeViolation):
1262 """Checks that the target node is correct in terms of instance policy.
1264 @param ipolicy: The ipolicy to verify
1265 @param instance: The instance object to verify
1266 @param node: The new node to relocate
1267 @param ignore: Ignore violations of the ipolicy
1268 @param _compute_fn: The function to verify ipolicy (unittest only)
1269 @see: L{_ComputeIPolicySpecViolation}
1272 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1273 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1276 msg = ("Instance does not meet target node group's (%s) instance"
1277 " policy: %s") % (node.group, utils.CommaJoin(res))
1281 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1284 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1285 """Computes a set of any instances that would violate the new ipolicy.
1287 @param old_ipolicy: The current (still in-place) ipolicy
1288 @param new_ipolicy: The new (to become) ipolicy
1289 @param instances: List of instances to verify
1290 @return: A list of instances which violates the new ipolicy but
1294 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1295 _ComputeViolatingInstances(old_ipolicy, instances))
1298 def _ExpandItemName(fn, name, kind):
1299 """Expand an item name.
1301 @param fn: the function to use for expansion
1302 @param name: requested item name
1303 @param kind: text description ('Node' or 'Instance')
1304 @return: the resolved (full) name
1305 @raise errors.OpPrereqError: if the item is not found
1308 full_name = fn(name)
1309 if full_name is None:
1310 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1315 def _ExpandNodeName(cfg, name):
1316 """Wrapper over L{_ExpandItemName} for nodes."""
1317 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1320 def _ExpandInstanceName(cfg, name):
1321 """Wrapper over L{_ExpandItemName} for instance."""
1322 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1325 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1326 network_type, mac_prefix, tags):
1327 """Builds network related env variables for hooks
1329 This builds the hook environment from individual variables.
1332 @param name: the name of the network
1333 @type subnet: string
1334 @param subnet: the ipv4 subnet
1335 @type gateway: string
1336 @param gateway: the ipv4 gateway
1337 @type network6: string
1338 @param network6: the ipv6 subnet
1339 @type gateway6: string
1340 @param gateway6: the ipv6 gateway
1341 @type network_type: string
1342 @param network_type: the type of the network
1343 @type mac_prefix: string
1344 @param mac_prefix: the mac_prefix
1346 @param tags: the tags of the network
1351 env["NETWORK_NAME"] = name
1353 env["NETWORK_SUBNET"] = subnet
1355 env["NETWORK_GATEWAY"] = gateway
1357 env["NETWORK_SUBNET6"] = network6
1359 env["NETWORK_GATEWAY6"] = gateway6
1361 env["NETWORK_MAC_PREFIX"] = mac_prefix
1363 env["NETWORK_TYPE"] = network_type
1365 env["NETWORK_TAGS"] = " ".join(tags)
1370 def _BuildNetworkHookEnvByObject(net):
1371 """Builds network related env varliables for hooks
1373 @type net: L{objects.Network}
1374 @param net: the network object
1379 "subnet": net.network,
1380 "gateway": net.gateway,
1381 "network6": net.network6,
1382 "gateway6": net.gateway6,
1383 "network_type": net.network_type,
1384 "mac_prefix": net.mac_prefix,
1388 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1391 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1392 minmem, maxmem, vcpus, nics, disk_template, disks,
1393 bep, hvp, hypervisor_name, tags):
1394 """Builds instance related env variables for hooks
1396 This builds the hook environment from individual variables.
1399 @param name: the name of the instance
1400 @type primary_node: string
1401 @param primary_node: the name of the instance's primary node
1402 @type secondary_nodes: list
1403 @param secondary_nodes: list of secondary nodes as strings
1404 @type os_type: string
1405 @param os_type: the name of the instance's OS
1406 @type status: string
1407 @param status: the desired status of the instance
1408 @type minmem: string
1409 @param minmem: the minimum memory size of the instance
1410 @type maxmem: string
1411 @param maxmem: the maximum memory size of the instance
1413 @param vcpus: the count of VCPUs the instance has
1415 @param nics: list of tuples (ip, mac, mode, link, network) representing
1416 the NICs the instance has
1417 @type disk_template: string
1418 @param disk_template: the disk template of the instance
1420 @param disks: the list of (size, mode) pairs
1422 @param bep: the backend parameters for the instance
1424 @param hvp: the hypervisor parameters for the instance
1425 @type hypervisor_name: string
1426 @param hypervisor_name: the hypervisor for the instance
1428 @param tags: list of instance tags as strings
1430 @return: the hook environment for this instance
1435 "INSTANCE_NAME": name,
1436 "INSTANCE_PRIMARY": primary_node,
1437 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1438 "INSTANCE_OS_TYPE": os_type,
1439 "INSTANCE_STATUS": status,
1440 "INSTANCE_MINMEM": minmem,
1441 "INSTANCE_MAXMEM": maxmem,
1442 # TODO(2.7) remove deprecated "memory" value
1443 "INSTANCE_MEMORY": maxmem,
1444 "INSTANCE_VCPUS": vcpus,
1445 "INSTANCE_DISK_TEMPLATE": disk_template,
1446 "INSTANCE_HYPERVISOR": hypervisor_name,
1449 nic_count = len(nics)
1450 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1453 env["INSTANCE_NIC%d_IP" % idx] = ip
1454 env["INSTANCE_NIC%d_MAC" % idx] = mac
1455 env["INSTANCE_NIC%d_MODE" % idx] = mode
1456 env["INSTANCE_NIC%d_LINK" % idx] = link
1458 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1460 nobj = objects.Network.FromDict(netinfo)
1462 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1464 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1466 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1468 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1470 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1471 if nobj.network_type:
1472 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1474 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1475 if mode == constants.NIC_MODE_BRIDGED:
1476 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1480 env["INSTANCE_NIC_COUNT"] = nic_count
1483 disk_count = len(disks)
1484 for idx, (size, mode) in enumerate(disks):
1485 env["INSTANCE_DISK%d_SIZE" % idx] = size
1486 env["INSTANCE_DISK%d_MODE" % idx] = mode
1490 env["INSTANCE_DISK_COUNT"] = disk_count
1495 env["INSTANCE_TAGS"] = " ".join(tags)
1497 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1498 for key, value in source.items():
1499 env["INSTANCE_%s_%s" % (kind, key)] = value
1504 def _NICToTuple(lu, nic):
1505 """Build a tupple of nic information.
1507 @type lu: L{LogicalUnit}
1508 @param lu: the logical unit on whose behalf we execute
1509 @type nic: L{objects.NIC}
1510 @param nic: nic to convert to hooks tuple
1515 cluster = lu.cfg.GetClusterInfo()
1516 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1517 mode = filled_params[constants.NIC_MODE]
1518 link = filled_params[constants.NIC_LINK]
1522 net_uuid = lu.cfg.LookupNetwork(net)
1524 nobj = lu.cfg.GetNetwork(net_uuid)
1525 netinfo = objects.Network.ToDict(nobj)
1526 return (ip, mac, mode, link, net, netinfo)
1529 def _NICListToTuple(lu, nics):
1530 """Build a list of nic information tuples.
1532 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1533 value in LUInstanceQueryData.
1535 @type lu: L{LogicalUnit}
1536 @param lu: the logical unit on whose behalf we execute
1537 @type nics: list of L{objects.NIC}
1538 @param nics: list of nics to convert to hooks tuples
1543 hooks_nics.append(_NICToTuple(lu, nic))
1547 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1548 """Builds instance related env variables for hooks from an object.
1550 @type lu: L{LogicalUnit}
1551 @param lu: the logical unit on whose behalf we execute
1552 @type instance: L{objects.Instance}
1553 @param instance: the instance for which we should build the
1555 @type override: dict
1556 @param override: dictionary with key/values that will override
1559 @return: the hook environment dictionary
1562 cluster = lu.cfg.GetClusterInfo()
1563 bep = cluster.FillBE(instance)
1564 hvp = cluster.FillHV(instance)
1566 "name": instance.name,
1567 "primary_node": instance.primary_node,
1568 "secondary_nodes": instance.secondary_nodes,
1569 "os_type": instance.os,
1570 "status": instance.admin_state,
1571 "maxmem": bep[constants.BE_MAXMEM],
1572 "minmem": bep[constants.BE_MINMEM],
1573 "vcpus": bep[constants.BE_VCPUS],
1574 "nics": _NICListToTuple(lu, instance.nics),
1575 "disk_template": instance.disk_template,
1576 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1579 "hypervisor_name": instance.hypervisor,
1580 "tags": instance.tags,
1583 args.update(override)
1584 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1587 def _AdjustCandidatePool(lu, exceptions):
1588 """Adjust the candidate pool after node operations.
1591 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1593 lu.LogInfo("Promoted nodes to master candidate role: %s",
1594 utils.CommaJoin(node.name for node in mod_list))
1595 for name in mod_list:
1596 lu.context.ReaddNode(name)
1597 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1599 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1603 def _DecideSelfPromotion(lu, exceptions=None):
1604 """Decide whether I should promote myself as a master candidate.
1607 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1608 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1609 # the new node will increase mc_max with one, so:
1610 mc_should = min(mc_should + 1, cp_size)
1611 return mc_now < mc_should
1614 def _ComputeViolatingInstances(ipolicy, instances):
1615 """Computes a set of instances who violates given ipolicy.
1617 @param ipolicy: The ipolicy to verify
1618 @type instances: object.Instance
1619 @param instances: List of instances to verify
1620 @return: A frozenset of instance names violating the ipolicy
1623 return frozenset([inst.name for inst in instances
1624 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1627 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1628 """Check that the brigdes needed by a list of nics exist.
1631 cluster = lu.cfg.GetClusterInfo()
1632 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1633 brlist = [params[constants.NIC_LINK] for params in paramslist
1634 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1636 result = lu.rpc.call_bridges_exist(target_node, brlist)
1637 result.Raise("Error checking bridges on destination node '%s'" %
1638 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1641 def _CheckInstanceBridgesExist(lu, instance, node=None):
1642 """Check that the brigdes needed by an instance exist.
1646 node = instance.primary_node
1647 _CheckNicsBridgesExist(lu, instance.nics, node)
1650 def _CheckOSVariant(os_obj, name):
1651 """Check whether an OS name conforms to the os variants specification.
1653 @type os_obj: L{objects.OS}
1654 @param os_obj: OS object to check
1656 @param name: OS name passed by the user, to check for validity
1659 variant = objects.OS.GetVariant(name)
1660 if not os_obj.supported_variants:
1662 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1663 " passed)" % (os_obj.name, variant),
1667 raise errors.OpPrereqError("OS name must include a variant",
1670 if variant not in os_obj.supported_variants:
1671 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1674 def _GetNodeInstancesInner(cfg, fn):
1675 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1678 def _GetNodeInstances(cfg, node_name):
1679 """Returns a list of all primary and secondary instances on a node.
1683 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1686 def _GetNodePrimaryInstances(cfg, node_name):
1687 """Returns primary instances on a node.
1690 return _GetNodeInstancesInner(cfg,
1691 lambda inst: node_name == inst.primary_node)
1694 def _GetNodeSecondaryInstances(cfg, node_name):
1695 """Returns secondary instances on a node.
1698 return _GetNodeInstancesInner(cfg,
1699 lambda inst: node_name in inst.secondary_nodes)
1702 def _GetStorageTypeArgs(cfg, storage_type):
1703 """Returns the arguments for a storage type.
1706 # Special case for file storage
1707 if storage_type == constants.ST_FILE:
1708 # storage.FileStorage wants a list of storage directories
1709 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1714 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1717 for dev in instance.disks:
1718 cfg.SetDiskID(dev, node_name)
1720 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1722 result.Raise("Failed to get disk status from node %s" % node_name,
1723 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1725 for idx, bdev_status in enumerate(result.payload):
1726 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1732 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1733 """Check the sanity of iallocator and node arguments and use the
1734 cluster-wide iallocator if appropriate.
1736 Check that at most one of (iallocator, node) is specified. If none is
1737 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1738 then the LU's opcode's iallocator slot is filled with the cluster-wide
1741 @type iallocator_slot: string
1742 @param iallocator_slot: the name of the opcode iallocator slot
1743 @type node_slot: string
1744 @param node_slot: the name of the opcode target node slot
1747 node = getattr(lu.op, node_slot, None)
1748 ialloc = getattr(lu.op, iallocator_slot, None)
1752 if node is not None and ialloc is not None:
1753 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1755 elif ((node is None and ialloc is None) or
1756 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1757 default_iallocator = lu.cfg.GetDefaultIAllocator()
1758 if default_iallocator:
1759 setattr(lu.op, iallocator_slot, default_iallocator)
1761 raise errors.OpPrereqError("No iallocator or node given and no"
1762 " cluster-wide default iallocator found;"
1763 " please specify either an iallocator or a"
1764 " node, or set a cluster-wide default"
1765 " iallocator", errors.ECODE_INVAL)
1768 def _GetDefaultIAllocator(cfg, ialloc):
1769 """Decides on which iallocator to use.
1771 @type cfg: L{config.ConfigWriter}
1772 @param cfg: Cluster configuration object
1773 @type ialloc: string or None
1774 @param ialloc: Iallocator specified in opcode
1776 @return: Iallocator name
1780 # Use default iallocator
1781 ialloc = cfg.GetDefaultIAllocator()
1784 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1785 " opcode nor as a cluster-wide default",
1791 def _CheckHostnameSane(lu, name):
1792 """Ensures that a given hostname resolves to a 'sane' name.
1794 The given name is required to be a prefix of the resolved hostname,
1795 to prevent accidental mismatches.
1797 @param lu: the logical unit on behalf of which we're checking
1798 @param name: the name we should resolve and check
1799 @return: the resolved hostname object
1802 hostname = netutils.GetHostname(name=name)
1803 if hostname.name != name:
1804 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1805 if not utils.MatchNameComponent(name, [hostname.name]):
1806 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1807 " same as given hostname '%s'") %
1808 (hostname.name, name), errors.ECODE_INVAL)
1812 class LUClusterPostInit(LogicalUnit):
1813 """Logical unit for running hooks after cluster initialization.
1816 HPATH = "cluster-init"
1817 HTYPE = constants.HTYPE_CLUSTER
1819 def BuildHooksEnv(self):
1824 "OP_TARGET": self.cfg.GetClusterName(),
1827 def BuildHooksNodes(self):
1828 """Build hooks nodes.
1831 return ([], [self.cfg.GetMasterNode()])
1833 def Exec(self, feedback_fn):
1840 class LUClusterDestroy(LogicalUnit):
1841 """Logical unit for destroying the cluster.
1844 HPATH = "cluster-destroy"
1845 HTYPE = constants.HTYPE_CLUSTER
1847 def BuildHooksEnv(self):
1852 "OP_TARGET": self.cfg.GetClusterName(),
1855 def BuildHooksNodes(self):
1856 """Build hooks nodes.
1861 def CheckPrereq(self):
1862 """Check prerequisites.
1864 This checks whether the cluster is empty.
1866 Any errors are signaled by raising errors.OpPrereqError.
1869 master = self.cfg.GetMasterNode()
1871 nodelist = self.cfg.GetNodeList()
1872 if len(nodelist) != 1 or nodelist[0] != master:
1873 raise errors.OpPrereqError("There are still %d node(s) in"
1874 " this cluster." % (len(nodelist) - 1),
1876 instancelist = self.cfg.GetInstanceList()
1878 raise errors.OpPrereqError("There are still %d instance(s) in"
1879 " this cluster." % len(instancelist),
1882 def Exec(self, feedback_fn):
1883 """Destroys the cluster.
1886 master_params = self.cfg.GetMasterNetworkParameters()
1888 # Run post hooks on master node before it's removed
1889 _RunPostHook(self, master_params.name)
1891 ems = self.cfg.GetUseExternalMipScript()
1892 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1895 self.LogWarning("Error disabling the master IP address: %s",
1898 return master_params.name
1901 def _VerifyCertificate(filename):
1902 """Verifies a certificate for L{LUClusterVerifyConfig}.
1904 @type filename: string
1905 @param filename: Path to PEM file
1909 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1910 utils.ReadFile(filename))
1911 except Exception, err: # pylint: disable=W0703
1912 return (LUClusterVerifyConfig.ETYPE_ERROR,
1913 "Failed to load X509 certificate %s: %s" % (filename, err))
1916 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1917 constants.SSL_CERT_EXPIRATION_ERROR)
1920 fnamemsg = "While verifying %s: %s" % (filename, msg)
1925 return (None, fnamemsg)
1926 elif errcode == utils.CERT_WARNING:
1927 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1928 elif errcode == utils.CERT_ERROR:
1929 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1931 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1934 def _GetAllHypervisorParameters(cluster, instances):
1935 """Compute the set of all hypervisor parameters.
1937 @type cluster: L{objects.Cluster}
1938 @param cluster: the cluster object
1939 @param instances: list of L{objects.Instance}
1940 @param instances: additional instances from which to obtain parameters
1941 @rtype: list of (origin, hypervisor, parameters)
1942 @return: a list with all parameters found, indicating the hypervisor they
1943 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1948 for hv_name in cluster.enabled_hypervisors:
1949 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1951 for os_name, os_hvp in cluster.os_hvp.items():
1952 for hv_name, hv_params in os_hvp.items():
1954 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1955 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1957 # TODO: collapse identical parameter values in a single one
1958 for instance in instances:
1959 if instance.hvparams:
1960 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1961 cluster.FillHV(instance)))
1966 class _VerifyErrors(object):
1967 """Mix-in for cluster/group verify LUs.
1969 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1970 self.op and self._feedback_fn to be available.)
1974 ETYPE_FIELD = "code"
1975 ETYPE_ERROR = "ERROR"
1976 ETYPE_WARNING = "WARNING"
1978 def _Error(self, ecode, item, msg, *args, **kwargs):
1979 """Format an error message.
1981 Based on the opcode's error_codes parameter, either format a
1982 parseable error code, or a simpler error string.
1984 This must be called only from Exec and functions called from Exec.
1987 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1988 itype, etxt, _ = ecode
1989 # first complete the msg
1992 # then format the whole message
1993 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1994 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2000 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2001 # and finally report it via the feedback_fn
2002 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2004 def _ErrorIf(self, cond, ecode, *args, **kwargs):
2005 """Log an error message if the passed condition is True.
2009 or self.op.debug_simulate_errors) # pylint: disable=E1101
2011 # If the error code is in the list of ignored errors, demote the error to a
2013 (_, etxt, _) = ecode
2014 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2015 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2018 self._Error(ecode, *args, **kwargs)
2020 # do not mark the operation as failed for WARN cases only
2021 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2022 self.bad = self.bad or cond
2025 class LUClusterVerify(NoHooksLU):
2026 """Submits all jobs necessary to verify the cluster.
2031 def ExpandNames(self):
2032 self.needed_locks = {}
2034 def Exec(self, feedback_fn):
2037 if self.op.group_name:
2038 groups = [self.op.group_name]
2039 depends_fn = lambda: None
2041 groups = self.cfg.GetNodeGroupList()
2043 # Verify global configuration
2045 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2048 # Always depend on global verification
2049 depends_fn = lambda: [(-len(jobs), [])]
2052 [opcodes.OpClusterVerifyGroup(group_name=group,
2053 ignore_errors=self.op.ignore_errors,
2054 depends=depends_fn())]
2055 for group in groups)
2057 # Fix up all parameters
2058 for op in itertools.chain(*jobs): # pylint: disable=W0142
2059 op.debug_simulate_errors = self.op.debug_simulate_errors
2060 op.verbose = self.op.verbose
2061 op.error_codes = self.op.error_codes
2063 op.skip_checks = self.op.skip_checks
2064 except AttributeError:
2065 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2067 return ResultWithJobs(jobs)
2070 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2071 """Verifies the cluster config.
2076 def _VerifyHVP(self, hvp_data):
2077 """Verifies locally the syntax of the hypervisor parameters.
2080 for item, hv_name, hv_params in hvp_data:
2081 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2084 hv_class = hypervisor.GetHypervisor(hv_name)
2085 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2086 hv_class.CheckParameterSyntax(hv_params)
2087 except errors.GenericError, err:
2088 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2090 def ExpandNames(self):
2091 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2092 self.share_locks = _ShareAll()
2094 def CheckPrereq(self):
2095 """Check prerequisites.
2098 # Retrieve all information
2099 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2100 self.all_node_info = self.cfg.GetAllNodesInfo()
2101 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2103 def Exec(self, feedback_fn):
2104 """Verify integrity of cluster, performing various test on nodes.
2108 self._feedback_fn = feedback_fn
2110 feedback_fn("* Verifying cluster config")
2112 for msg in self.cfg.VerifyConfig():
2113 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2115 feedback_fn("* Verifying cluster certificate files")
2117 for cert_filename in pathutils.ALL_CERT_FILES:
2118 (errcode, msg) = _VerifyCertificate(cert_filename)
2119 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2121 feedback_fn("* Verifying hypervisor parameters")
2123 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2124 self.all_inst_info.values()))
2126 feedback_fn("* Verifying all nodes belong to an existing group")
2128 # We do this verification here because, should this bogus circumstance
2129 # occur, it would never be caught by VerifyGroup, which only acts on
2130 # nodes/instances reachable from existing node groups.
2132 dangling_nodes = set(node.name for node in self.all_node_info.values()
2133 if node.group not in self.all_group_info)
2135 dangling_instances = {}
2136 no_node_instances = []
2138 for inst in self.all_inst_info.values():
2139 if inst.primary_node in dangling_nodes:
2140 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2141 elif inst.primary_node not in self.all_node_info:
2142 no_node_instances.append(inst.name)
2147 utils.CommaJoin(dangling_instances.get(node.name,
2149 for node in dangling_nodes]
2151 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2153 "the following nodes (and their instances) belong to a non"
2154 " existing group: %s", utils.CommaJoin(pretty_dangling))
2156 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2158 "the following instances have a non-existing primary-node:"
2159 " %s", utils.CommaJoin(no_node_instances))
2164 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2165 """Verifies the status of a node group.
2168 HPATH = "cluster-verify"
2169 HTYPE = constants.HTYPE_CLUSTER
2172 _HOOKS_INDENT_RE = re.compile("^", re.M)
2174 class NodeImage(object):
2175 """A class representing the logical and physical status of a node.
2178 @ivar name: the node name to which this object refers
2179 @ivar volumes: a structure as returned from
2180 L{ganeti.backend.GetVolumeList} (runtime)
2181 @ivar instances: a list of running instances (runtime)
2182 @ivar pinst: list of configured primary instances (config)
2183 @ivar sinst: list of configured secondary instances (config)
2184 @ivar sbp: dictionary of {primary-node: list of instances} for all
2185 instances for which this node is secondary (config)
2186 @ivar mfree: free memory, as reported by hypervisor (runtime)
2187 @ivar dfree: free disk, as reported by the node (runtime)
2188 @ivar offline: the offline status (config)
2189 @type rpc_fail: boolean
2190 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2191 not whether the individual keys were correct) (runtime)
2192 @type lvm_fail: boolean
2193 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2194 @type hyp_fail: boolean
2195 @ivar hyp_fail: whether the RPC call didn't return the instance list
2196 @type ghost: boolean
2197 @ivar ghost: whether this is a known node or not (config)
2198 @type os_fail: boolean
2199 @ivar os_fail: whether the RPC call didn't return valid OS data
2201 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2202 @type vm_capable: boolean
2203 @ivar vm_capable: whether the node can host instances
2206 def __init__(self, offline=False, name=None, vm_capable=True):
2215 self.offline = offline
2216 self.vm_capable = vm_capable
2217 self.rpc_fail = False
2218 self.lvm_fail = False
2219 self.hyp_fail = False
2221 self.os_fail = False
2224 def ExpandNames(self):
2225 # This raises errors.OpPrereqError on its own:
2226 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2228 # Get instances in node group; this is unsafe and needs verification later
2230 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2232 self.needed_locks = {
2233 locking.LEVEL_INSTANCE: inst_names,
2234 locking.LEVEL_NODEGROUP: [self.group_uuid],
2235 locking.LEVEL_NODE: [],
2237 # This opcode is run by watcher every five minutes and acquires all nodes
2238 # for a group. It doesn't run for a long time, so it's better to acquire
2239 # the node allocation lock as well.
2240 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2243 self.share_locks = _ShareAll()
2245 def DeclareLocks(self, level):
2246 if level == locking.LEVEL_NODE:
2247 # Get members of node group; this is unsafe and needs verification later
2248 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2250 all_inst_info = self.cfg.GetAllInstancesInfo()
2252 # In Exec(), we warn about mirrored instances that have primary and
2253 # secondary living in separate node groups. To fully verify that
2254 # volumes for these instances are healthy, we will need to do an
2255 # extra call to their secondaries. We ensure here those nodes will
2257 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2258 # Important: access only the instances whose lock is owned
2259 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2260 nodes.update(all_inst_info[inst].secondary_nodes)
2262 self.needed_locks[locking.LEVEL_NODE] = nodes
2264 def CheckPrereq(self):
2265 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2266 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2268 group_nodes = set(self.group_info.members)
2270 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2273 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2275 unlocked_instances = \
2276 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2279 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2280 utils.CommaJoin(unlocked_nodes),
2283 if unlocked_instances:
2284 raise errors.OpPrereqError("Missing lock for instances: %s" %
2285 utils.CommaJoin(unlocked_instances),
2288 self.all_node_info = self.cfg.GetAllNodesInfo()
2289 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2291 self.my_node_names = utils.NiceSort(group_nodes)
2292 self.my_inst_names = utils.NiceSort(group_instances)
2294 self.my_node_info = dict((name, self.all_node_info[name])
2295 for name in self.my_node_names)
2297 self.my_inst_info = dict((name, self.all_inst_info[name])
2298 for name in self.my_inst_names)
2300 # We detect here the nodes that will need the extra RPC calls for verifying
2301 # split LV volumes; they should be locked.
2302 extra_lv_nodes = set()
2304 for inst in self.my_inst_info.values():
2305 if inst.disk_template in constants.DTS_INT_MIRROR:
2306 for nname in inst.all_nodes:
2307 if self.all_node_info[nname].group != self.group_uuid:
2308 extra_lv_nodes.add(nname)
2310 unlocked_lv_nodes = \
2311 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2313 if unlocked_lv_nodes:
2314 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2315 utils.CommaJoin(unlocked_lv_nodes),
2317 self.extra_lv_nodes = list(extra_lv_nodes)
2319 def _VerifyNode(self, ninfo, nresult):
2320 """Perform some basic validation on data returned from a node.
2322 - check the result data structure is well formed and has all the
2324 - check ganeti version
2326 @type ninfo: L{objects.Node}
2327 @param ninfo: the node to check
2328 @param nresult: the results from the node
2330 @return: whether overall this call was successful (and we can expect
2331 reasonable values in the respose)
2335 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2337 # main result, nresult should be a non-empty dict
2338 test = not nresult or not isinstance(nresult, dict)
2339 _ErrorIf(test, constants.CV_ENODERPC, node,
2340 "unable to verify node: no data returned")
2344 # compares ganeti version
2345 local_version = constants.PROTOCOL_VERSION
2346 remote_version = nresult.get("version", None)
2347 test = not (remote_version and
2348 isinstance(remote_version, (list, tuple)) and
2349 len(remote_version) == 2)
2350 _ErrorIf(test, constants.CV_ENODERPC, node,
2351 "connection to node returned invalid data")
2355 test = local_version != remote_version[0]
2356 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2357 "incompatible protocol versions: master %s,"
2358 " node %s", local_version, remote_version[0])
2362 # node seems compatible, we can actually try to look into its results
2364 # full package version
2365 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2366 constants.CV_ENODEVERSION, node,
2367 "software version mismatch: master %s, node %s",
2368 constants.RELEASE_VERSION, remote_version[1],
2369 code=self.ETYPE_WARNING)
2371 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2372 if ninfo.vm_capable and isinstance(hyp_result, dict):
2373 for hv_name, hv_result in hyp_result.iteritems():
2374 test = hv_result is not None
2375 _ErrorIf(test, constants.CV_ENODEHV, node,
2376 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2378 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2379 if ninfo.vm_capable and isinstance(hvp_result, list):
2380 for item, hv_name, hv_result in hvp_result:
2381 _ErrorIf(True, constants.CV_ENODEHV, node,
2382 "hypervisor %s parameter verify failure (source %s): %s",
2383 hv_name, item, hv_result)
2385 test = nresult.get(constants.NV_NODESETUP,
2386 ["Missing NODESETUP results"])
2387 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2392 def _VerifyNodeTime(self, ninfo, nresult,
2393 nvinfo_starttime, nvinfo_endtime):
2394 """Check the node time.
2396 @type ninfo: L{objects.Node}
2397 @param ninfo: the node to check
2398 @param nresult: the remote results for the node
2399 @param nvinfo_starttime: the start time of the RPC call
2400 @param nvinfo_endtime: the end time of the RPC call
2404 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2406 ntime = nresult.get(constants.NV_TIME, None)
2408 ntime_merged = utils.MergeTime(ntime)
2409 except (ValueError, TypeError):
2410 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2413 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2414 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2415 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2416 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2420 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2421 "Node time diverges by at least %s from master node time",
2424 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2425 """Check the node LVM results.
2427 @type ninfo: L{objects.Node}
2428 @param ninfo: the node to check
2429 @param nresult: the remote results for the node
2430 @param vg_name: the configured VG name
2437 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2439 # checks vg existence and size > 20G
2440 vglist = nresult.get(constants.NV_VGLIST, None)
2442 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2444 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2445 constants.MIN_VG_SIZE)
2446 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2449 pvlist = nresult.get(constants.NV_PVLIST, None)
2450 test = pvlist is None
2451 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2453 # check that ':' is not present in PV names, since it's a
2454 # special character for lvcreate (denotes the range of PEs to
2456 for _, pvname, owner_vg in pvlist:
2457 test = ":" in pvname
2458 _ErrorIf(test, constants.CV_ENODELVM, node,
2459 "Invalid character ':' in PV '%s' of VG '%s'",
2462 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2463 """Check the node bridges.
2465 @type ninfo: L{objects.Node}
2466 @param ninfo: the node to check
2467 @param nresult: the remote results for the node
2468 @param bridges: the expected list of bridges
2475 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2477 missing = nresult.get(constants.NV_BRIDGES, None)
2478 test = not isinstance(missing, list)
2479 _ErrorIf(test, constants.CV_ENODENET, node,
2480 "did not return valid bridge information")
2482 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2483 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2485 def _VerifyNodeUserScripts(self, ninfo, nresult):
2486 """Check the results of user scripts presence and executability on the node
2488 @type ninfo: L{objects.Node}
2489 @param ninfo: the node to check
2490 @param nresult: the remote results for the node
2495 test = not constants.NV_USERSCRIPTS in nresult
2496 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2497 "did not return user scripts information")
2499 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2501 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2502 "user scripts not present or not executable: %s" %
2503 utils.CommaJoin(sorted(broken_scripts)))
2505 def _VerifyNodeNetwork(self, ninfo, nresult):
2506 """Check the node network connectivity results.
2508 @type ninfo: L{objects.Node}
2509 @param ninfo: the node to check
2510 @param nresult: the remote results for the node
2514 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2516 test = constants.NV_NODELIST not in nresult
2517 _ErrorIf(test, constants.CV_ENODESSH, node,
2518 "node hasn't returned node ssh connectivity data")
2520 if nresult[constants.NV_NODELIST]:
2521 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2522 _ErrorIf(True, constants.CV_ENODESSH, node,
2523 "ssh communication with node '%s': %s", a_node, a_msg)
2525 test = constants.NV_NODENETTEST not in nresult
2526 _ErrorIf(test, constants.CV_ENODENET, node,
2527 "node hasn't returned node tcp connectivity data")
2529 if nresult[constants.NV_NODENETTEST]:
2530 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2532 _ErrorIf(True, constants.CV_ENODENET, node,
2533 "tcp communication with node '%s': %s",
2534 anode, nresult[constants.NV_NODENETTEST][anode])
2536 test = constants.NV_MASTERIP not in nresult
2537 _ErrorIf(test, constants.CV_ENODENET, node,
2538 "node hasn't returned node master IP reachability data")
2540 if not nresult[constants.NV_MASTERIP]:
2541 if node == self.master_node:
2542 msg = "the master node cannot reach the master IP (not configured?)"
2544 msg = "cannot reach the master IP"
2545 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2547 def _VerifyInstance(self, instance, instanceconfig, node_image,
2549 """Verify an instance.
2551 This function checks to see if the required block devices are
2552 available on the instance's node.
2555 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2556 node_current = instanceconfig.primary_node
2558 node_vol_should = {}
2559 instanceconfig.MapLVsByNode(node_vol_should)
2561 cluster = self.cfg.GetClusterInfo()
2562 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2564 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2565 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2566 code=self.ETYPE_WARNING)
2568 for node in node_vol_should:
2569 n_img = node_image[node]
2570 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2571 # ignore missing volumes on offline or broken nodes
2573 for volume in node_vol_should[node]:
2574 test = volume not in n_img.volumes
2575 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2576 "volume %s missing on node %s", volume, node)
2578 if instanceconfig.admin_state == constants.ADMINST_UP:
2579 pri_img = node_image[node_current]
2580 test = instance not in pri_img.instances and not pri_img.offline
2581 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2582 "instance not running on its primary node %s",
2585 diskdata = [(nname, success, status, idx)
2586 for (nname, disks) in diskstatus.items()
2587 for idx, (success, status) in enumerate(disks)]
2589 for nname, success, bdev_status, idx in diskdata:
2590 # the 'ghost node' construction in Exec() ensures that we have a
2592 snode = node_image[nname]
2593 bad_snode = snode.ghost or snode.offline
2594 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2595 not success and not bad_snode,
2596 constants.CV_EINSTANCEFAULTYDISK, instance,
2597 "couldn't retrieve status for disk/%s on %s: %s",
2598 idx, nname, bdev_status)
2599 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2600 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2601 constants.CV_EINSTANCEFAULTYDISK, instance,
2602 "disk/%s on %s is faulty", idx, nname)
2604 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2605 """Verify if there are any unknown volumes in the cluster.
2607 The .os, .swap and backup volumes are ignored. All other volumes are
2608 reported as unknown.
2610 @type reserved: L{ganeti.utils.FieldSet}
2611 @param reserved: a FieldSet of reserved volume names
2614 for node, n_img in node_image.items():
2615 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2616 self.all_node_info[node].group != self.group_uuid):
2617 # skip non-healthy nodes
2619 for volume in n_img.volumes:
2620 test = ((node not in node_vol_should or
2621 volume not in node_vol_should[node]) and
2622 not reserved.Matches(volume))
2623 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2624 "volume %s is unknown", volume)
2626 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2627 """Verify N+1 Memory Resilience.
2629 Check that if one single node dies we can still start all the
2630 instances it was primary for.
2633 cluster_info = self.cfg.GetClusterInfo()
2634 for node, n_img in node_image.items():
2635 # This code checks that every node which is now listed as
2636 # secondary has enough memory to host all instances it is
2637 # supposed to should a single other node in the cluster fail.
2638 # FIXME: not ready for failover to an arbitrary node
2639 # FIXME: does not support file-backed instances
2640 # WARNING: we currently take into account down instances as well
2641 # as up ones, considering that even if they're down someone
2642 # might want to start them even in the event of a node failure.
2643 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2644 # we're skipping nodes marked offline and nodes in other groups from
2645 # the N+1 warning, since most likely we don't have good memory
2646 # infromation from them; we already list instances living on such
2647 # nodes, and that's enough warning
2649 #TODO(dynmem): also consider ballooning out other instances
2650 for prinode, instances in n_img.sbp.items():
2652 for instance in instances:
2653 bep = cluster_info.FillBE(instance_cfg[instance])
2654 if bep[constants.BE_AUTO_BALANCE]:
2655 needed_mem += bep[constants.BE_MINMEM]
2656 test = n_img.mfree < needed_mem
2657 self._ErrorIf(test, constants.CV_ENODEN1, node,
2658 "not enough memory to accomodate instance failovers"
2659 " should node %s fail (%dMiB needed, %dMiB available)",
2660 prinode, needed_mem, n_img.mfree)
2663 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2664 (files_all, files_opt, files_mc, files_vm)):
2665 """Verifies file checksums collected from all nodes.
2667 @param errorif: Callback for reporting errors
2668 @param nodeinfo: List of L{objects.Node} objects
2669 @param master_node: Name of master node
2670 @param all_nvinfo: RPC results
2673 # Define functions determining which nodes to consider for a file
2676 (files_mc, lambda node: (node.master_candidate or
2677 node.name == master_node)),
2678 (files_vm, lambda node: node.vm_capable),
2681 # Build mapping from filename to list of nodes which should have the file
2683 for (files, fn) in files2nodefn:
2685 filenodes = nodeinfo
2687 filenodes = filter(fn, nodeinfo)
2688 nodefiles.update((filename,
2689 frozenset(map(operator.attrgetter("name"), filenodes)))
2690 for filename in files)
2692 assert set(nodefiles) == (files_all | files_mc | files_vm)
2694 fileinfo = dict((filename, {}) for filename in nodefiles)
2695 ignore_nodes = set()
2697 for node in nodeinfo:
2699 ignore_nodes.add(node.name)
2702 nresult = all_nvinfo[node.name]
2704 if nresult.fail_msg or not nresult.payload:
2707 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2708 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2709 for (key, value) in fingerprints.items())
2712 test = not (node_files and isinstance(node_files, dict))
2713 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2714 "Node did not return file checksum data")
2716 ignore_nodes.add(node.name)
2719 # Build per-checksum mapping from filename to nodes having it
2720 for (filename, checksum) in node_files.items():
2721 assert filename in nodefiles
2722 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2724 for (filename, checksums) in fileinfo.items():
2725 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2727 # Nodes having the file
2728 with_file = frozenset(node_name
2729 for nodes in fileinfo[filename].values()
2730 for node_name in nodes) - ignore_nodes
2732 expected_nodes = nodefiles[filename] - ignore_nodes
2734 # Nodes missing file
2735 missing_file = expected_nodes - with_file
2737 if filename in files_opt:
2739 errorif(missing_file and missing_file != expected_nodes,
2740 constants.CV_ECLUSTERFILECHECK, None,
2741 "File %s is optional, but it must exist on all or no"
2742 " nodes (not found on %s)",
2743 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2745 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2746 "File %s is missing from node(s) %s", filename,
2747 utils.CommaJoin(utils.NiceSort(missing_file)))
2749 # Warn if a node has a file it shouldn't
2750 unexpected = with_file - expected_nodes
2752 constants.CV_ECLUSTERFILECHECK, None,
2753 "File %s should not exist on node(s) %s",
2754 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2756 # See if there are multiple versions of the file
2757 test = len(checksums) > 1
2759 variants = ["variant %s on %s" %
2760 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2761 for (idx, (checksum, nodes)) in
2762 enumerate(sorted(checksums.items()))]
2766 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2767 "File %s found with %s different checksums (%s)",
2768 filename, len(checksums), "; ".join(variants))
2770 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2772 """Verifies and the node DRBD status.
2774 @type ninfo: L{objects.Node}
2775 @param ninfo: the node to check
2776 @param nresult: the remote results for the node
2777 @param instanceinfo: the dict of instances
2778 @param drbd_helper: the configured DRBD usermode helper
2779 @param drbd_map: the DRBD map as returned by
2780 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2784 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2787 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2788 test = (helper_result is None)
2789 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2790 "no drbd usermode helper returned")
2792 status, payload = helper_result
2794 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2795 "drbd usermode helper check unsuccessful: %s", payload)
2796 test = status and (payload != drbd_helper)
2797 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2798 "wrong drbd usermode helper: %s", payload)
2800 # compute the DRBD minors
2802 for minor, instance in drbd_map[node].items():
2803 test = instance not in instanceinfo
2804 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2805 "ghost instance '%s' in temporary DRBD map", instance)
2806 # ghost instance should not be running, but otherwise we
2807 # don't give double warnings (both ghost instance and
2808 # unallocated minor in use)
2810 node_drbd[minor] = (instance, False)
2812 instance = instanceinfo[instance]
2813 node_drbd[minor] = (instance.name,
2814 instance.admin_state == constants.ADMINST_UP)
2816 # and now check them
2817 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2818 test = not isinstance(used_minors, (tuple, list))
2819 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2820 "cannot parse drbd status file: %s", str(used_minors))
2822 # we cannot check drbd status
2825 for minor, (iname, must_exist) in node_drbd.items():
2826 test = minor not in used_minors and must_exist
2827 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2828 "drbd minor %d of instance %s is not active", minor, iname)
2829 for minor in used_minors:
2830 test = minor not in node_drbd
2831 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2832 "unallocated drbd minor %d is in use", minor)
2834 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2835 """Builds the node OS structures.
2837 @type ninfo: L{objects.Node}
2838 @param ninfo: the node to check
2839 @param nresult: the remote results for the node
2840 @param nimg: the node image object
2844 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2846 remote_os = nresult.get(constants.NV_OSLIST, None)
2847 test = (not isinstance(remote_os, list) or
2848 not compat.all(isinstance(v, list) and len(v) == 7
2849 for v in remote_os))
2851 _ErrorIf(test, constants.CV_ENODEOS, node,
2852 "node hasn't returned valid OS data")
2861 for (name, os_path, status, diagnose,
2862 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2864 if name not in os_dict:
2867 # parameters is a list of lists instead of list of tuples due to
2868 # JSON lacking a real tuple type, fix it:
2869 parameters = [tuple(v) for v in parameters]
2870 os_dict[name].append((os_path, status, diagnose,
2871 set(variants), set(parameters), set(api_ver)))
2873 nimg.oslist = os_dict
2875 def _VerifyNodeOS(self, ninfo, nimg, base):
2876 """Verifies the node OS list.
2878 @type ninfo: L{objects.Node}
2879 @param ninfo: the node to check
2880 @param nimg: the node image object
2881 @param base: the 'template' node we match against (e.g. from the master)
2885 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2887 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2889 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2890 for os_name, os_data in nimg.oslist.items():
2891 assert os_data, "Empty OS status for OS %s?!" % os_name
2892 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2893 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2894 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2895 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2896 "OS '%s' has multiple entries (first one shadows the rest): %s",
2897 os_name, utils.CommaJoin([v[0] for v in os_data]))
2898 # comparisons with the 'base' image
2899 test = os_name not in base.oslist
2900 _ErrorIf(test, constants.CV_ENODEOS, node,
2901 "Extra OS %s not present on reference node (%s)",
2905 assert base.oslist[os_name], "Base node has empty OS status?"
2906 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2908 # base OS is invalid, skipping
2910 for kind, a, b in [("API version", f_api, b_api),
2911 ("variants list", f_var, b_var),
2912 ("parameters", beautify_params(f_param),
2913 beautify_params(b_param))]:
2914 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2915 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2916 kind, os_name, base.name,
2917 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2919 # check any missing OSes
2920 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2921 _ErrorIf(missing, constants.CV_ENODEOS, node,
2922 "OSes present on reference node %s but missing on this node: %s",
2923 base.name, utils.CommaJoin(missing))
2925 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2926 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2928 @type ninfo: L{objects.Node}
2929 @param ninfo: the node to check
2930 @param nresult: the remote results for the node
2931 @type is_master: bool
2932 @param is_master: Whether node is the master node
2938 (constants.ENABLE_FILE_STORAGE or
2939 constants.ENABLE_SHARED_FILE_STORAGE)):
2941 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2943 # This should never happen
2944 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2945 "Node did not return forbidden file storage paths")
2947 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2948 "Found forbidden file storage paths: %s",
2949 utils.CommaJoin(fspaths))
2951 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2952 constants.CV_ENODEFILESTORAGEPATHS, node,
2953 "Node should not have returned forbidden file storage"
2956 def _VerifyOob(self, ninfo, nresult):
2957 """Verifies out of band functionality of a node.
2959 @type ninfo: L{objects.Node}
2960 @param ninfo: the node to check
2961 @param nresult: the remote results for the node
2965 # We just have to verify the paths on master and/or master candidates
2966 # as the oob helper is invoked on the master
2967 if ((ninfo.master_candidate or ninfo.master_capable) and
2968 constants.NV_OOB_PATHS in nresult):
2969 for path_result in nresult[constants.NV_OOB_PATHS]:
2970 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2972 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2973 """Verifies and updates the node volume data.
2975 This function will update a L{NodeImage}'s internal structures
2976 with data from the remote call.
2978 @type ninfo: L{objects.Node}
2979 @param ninfo: the node to check
2980 @param nresult: the remote results for the node
2981 @param nimg: the node image object
2982 @param vg_name: the configured VG name
2986 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2988 nimg.lvm_fail = True
2989 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2992 elif isinstance(lvdata, basestring):
2993 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2994 utils.SafeEncode(lvdata))
2995 elif not isinstance(lvdata, dict):
2996 _ErrorIf(True, constants.CV_ENODELVM, node,
2997 "rpc call to node failed (lvlist)")
2999 nimg.volumes = lvdata
3000 nimg.lvm_fail = False
3002 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3003 """Verifies and updates the node instance list.
3005 If the listing was successful, then updates this node's instance
3006 list. Otherwise, it marks the RPC call as failed for the instance
3009 @type ninfo: L{objects.Node}
3010 @param ninfo: the node to check
3011 @param nresult: the remote results for the node
3012 @param nimg: the node image object
3015 idata = nresult.get(constants.NV_INSTANCELIST, None)
3016 test = not isinstance(idata, list)
3017 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3018 "rpc call to node failed (instancelist): %s",
3019 utils.SafeEncode(str(idata)))
3021 nimg.hyp_fail = True
3023 nimg.instances = idata
3025 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3026 """Verifies and computes a node information map
3028 @type ninfo: L{objects.Node}
3029 @param ninfo: the node to check
3030 @param nresult: the remote results for the node
3031 @param nimg: the node image object
3032 @param vg_name: the configured VG name
3036 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3038 # try to read free memory (from the hypervisor)
3039 hv_info = nresult.get(constants.NV_HVINFO, None)
3040 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3041 _ErrorIf(test, constants.CV_ENODEHV, node,
3042 "rpc call to node failed (hvinfo)")
3045 nimg.mfree = int(hv_info["memory_free"])
3046 except (ValueError, TypeError):
3047 _ErrorIf(True, constants.CV_ENODERPC, node,
3048 "node returned invalid nodeinfo, check hypervisor")
3050 # FIXME: devise a free space model for file based instances as well
3051 if vg_name is not None:
3052 test = (constants.NV_VGLIST not in nresult or
3053 vg_name not in nresult[constants.NV_VGLIST])
3054 _ErrorIf(test, constants.CV_ENODELVM, node,
3055 "node didn't return data for the volume group '%s'"
3056 " - it is either missing or broken", vg_name)
3059 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3060 except (ValueError, TypeError):
3061 _ErrorIf(True, constants.CV_ENODERPC, node,
3062 "node returned invalid LVM info, check LVM status")
3064 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3065 """Gets per-disk status information for all instances.
3067 @type nodelist: list of strings
3068 @param nodelist: Node names
3069 @type node_image: dict of (name, L{objects.Node})
3070 @param node_image: Node objects
3071 @type instanceinfo: dict of (name, L{objects.Instance})
3072 @param instanceinfo: Instance objects
3073 @rtype: {instance: {node: [(succes, payload)]}}
3074 @return: a dictionary of per-instance dictionaries with nodes as
3075 keys and disk information as values; the disk information is a
3076 list of tuples (success, payload)
3079 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3082 node_disks_devonly = {}
3083 diskless_instances = set()
3084 diskless = constants.DT_DISKLESS
3086 for nname in nodelist:
3087 node_instances = list(itertools.chain(node_image[nname].pinst,
3088 node_image[nname].sinst))
3089 diskless_instances.update(inst for inst in node_instances
3090 if instanceinfo[inst].disk_template == diskless)
3091 disks = [(inst, disk)
3092 for inst in node_instances
3093 for disk in instanceinfo[inst].disks]
3096 # No need to collect data
3099 node_disks[nname] = disks
3101 # _AnnotateDiskParams makes already copies of the disks
3103 for (inst, dev) in disks:
3104 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3105 self.cfg.SetDiskID(anno_disk, nname)
3106 devonly.append(anno_disk)
3108 node_disks_devonly[nname] = devonly
3110 assert len(node_disks) == len(node_disks_devonly)
3112 # Collect data from all nodes with disks
3113 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3116 assert len(result) == len(node_disks)
3120 for (nname, nres) in result.items():
3121 disks = node_disks[nname]
3124 # No data from this node
3125 data = len(disks) * [(False, "node offline")]
3128 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3129 "while getting disk information: %s", msg)
3131 # No data from this node
3132 data = len(disks) * [(False, msg)]
3135 for idx, i in enumerate(nres.payload):
3136 if isinstance(i, (tuple, list)) and len(i) == 2:
3139 logging.warning("Invalid result from node %s, entry %d: %s",
3141 data.append((False, "Invalid result from the remote node"))
3143 for ((inst, _), status) in zip(disks, data):
3144 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3146 # Add empty entries for diskless instances.
3147 for inst in diskless_instances:
3148 assert inst not in instdisk
3151 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3152 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3153 compat.all(isinstance(s, (tuple, list)) and
3154 len(s) == 2 for s in statuses)
3155 for inst, nnames in instdisk.items()
3156 for nname, statuses in nnames.items())
3157 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3162 def _SshNodeSelector(group_uuid, all_nodes):
3163 """Create endless iterators for all potential SSH check hosts.
3166 nodes = [node for node in all_nodes
3167 if (node.group != group_uuid and
3169 keyfunc = operator.attrgetter("group")
3171 return map(itertools.cycle,
3172 [sorted(map(operator.attrgetter("name"), names))
3173 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3177 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3178 """Choose which nodes should talk to which other nodes.
3180 We will make nodes contact all nodes in their group, and one node from
3183 @warning: This algorithm has a known issue if one node group is much
3184 smaller than others (e.g. just one node). In such a case all other
3185 nodes will talk to the single node.
3188 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3189 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3191 return (online_nodes,
3192 dict((name, sorted([i.next() for i in sel]))
3193 for name in online_nodes))
3195 def BuildHooksEnv(self):
3198 Cluster-Verify hooks just ran in the post phase and their failure makes
3199 the output be logged in the verify output and the verification to fail.
3203 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3206 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3207 for node in self.my_node_info.values())
3211 def BuildHooksNodes(self):
3212 """Build hooks nodes.
3215 return ([], self.my_node_names)
3217 def Exec(self, feedback_fn):
3218 """Verify integrity of the node group, performing various test on nodes.
3221 # This method has too many local variables. pylint: disable=R0914
3222 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3224 if not self.my_node_names:
3226 feedback_fn("* Empty node group, skipping verification")
3230 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3231 verbose = self.op.verbose
3232 self._feedback_fn = feedback_fn
3234 vg_name = self.cfg.GetVGName()
3235 drbd_helper = self.cfg.GetDRBDHelper()
3236 cluster = self.cfg.GetClusterInfo()
3237 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3238 hypervisors = cluster.enabled_hypervisors
3239 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3241 i_non_redundant = [] # Non redundant instances
3242 i_non_a_balanced = [] # Non auto-balanced instances
3243 i_offline = 0 # Count of offline instances
3244 n_offline = 0 # Count of offline nodes
3245 n_drained = 0 # Count of nodes being drained
3246 node_vol_should = {}
3248 # FIXME: verify OS list
3251 filemap = _ComputeAncillaryFiles(cluster, False)
3253 # do local checksums
3254 master_node = self.master_node = self.cfg.GetMasterNode()
3255 master_ip = self.cfg.GetMasterIP()
3257 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3260 if self.cfg.GetUseExternalMipScript():
3261 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3263 node_verify_param = {
3264 constants.NV_FILELIST:
3265 map(vcluster.MakeVirtualPath,
3266 utils.UniqueSequence(filename
3267 for files in filemap
3268 for filename in files)),
3269 constants.NV_NODELIST:
3270 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3271 self.all_node_info.values()),
3272 constants.NV_HYPERVISOR: hypervisors,
3273 constants.NV_HVPARAMS:
3274 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3275 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3276 for node in node_data_list
3277 if not node.offline],
3278 constants.NV_INSTANCELIST: hypervisors,
3279 constants.NV_VERSION: None,
3280 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3281 constants.NV_NODESETUP: None,
3282 constants.NV_TIME: None,
3283 constants.NV_MASTERIP: (master_node, master_ip),
3284 constants.NV_OSLIST: None,
3285 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3286 constants.NV_USERSCRIPTS: user_scripts,
3289 if vg_name is not None:
3290 node_verify_param[constants.NV_VGLIST] = None
3291 node_verify_param[constants.NV_LVLIST] = vg_name
3292 node_verify_param[constants.NV_PVLIST] = [vg_name]
3295 node_verify_param[constants.NV_DRBDLIST] = None
3296 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3298 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3299 # Load file storage paths only from master node
3300 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3303 # FIXME: this needs to be changed per node-group, not cluster-wide
3305 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3306 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3307 bridges.add(default_nicpp[constants.NIC_LINK])
3308 for instance in self.my_inst_info.values():
3309 for nic in instance.nics:
3310 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3311 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3312 bridges.add(full_nic[constants.NIC_LINK])
3315 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3317 # Build our expected cluster state
3318 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3320 vm_capable=node.vm_capable))
3321 for node in node_data_list)
3325 for node in self.all_node_info.values():
3326 path = _SupportsOob(self.cfg, node)
3327 if path and path not in oob_paths:
3328 oob_paths.append(path)
3331 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3333 for instance in self.my_inst_names:
3334 inst_config = self.my_inst_info[instance]
3335 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3338 for nname in inst_config.all_nodes:
3339 if nname not in node_image:
3340 gnode = self.NodeImage(name=nname)
3341 gnode.ghost = (nname not in self.all_node_info)
3342 node_image[nname] = gnode
3344 inst_config.MapLVsByNode(node_vol_should)
3346 pnode = inst_config.primary_node
3347 node_image[pnode].pinst.append(instance)
3349 for snode in inst_config.secondary_nodes:
3350 nimg = node_image[snode]
3351 nimg.sinst.append(instance)
3352 if pnode not in nimg.sbp:
3353 nimg.sbp[pnode] = []
3354 nimg.sbp[pnode].append(instance)
3356 # At this point, we have the in-memory data structures complete,
3357 # except for the runtime information, which we'll gather next
3359 # Due to the way our RPC system works, exact response times cannot be
3360 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3361 # time before and after executing the request, we can at least have a time
3363 nvinfo_starttime = time.time()
3364 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3366 self.cfg.GetClusterName())
3367 nvinfo_endtime = time.time()
3369 if self.extra_lv_nodes and vg_name is not None:
3371 self.rpc.call_node_verify(self.extra_lv_nodes,
3372 {constants.NV_LVLIST: vg_name},
3373 self.cfg.GetClusterName())
3375 extra_lv_nvinfo = {}
3377 all_drbd_map = self.cfg.ComputeDRBDMap()
3379 feedback_fn("* Gathering disk information (%s nodes)" %
3380 len(self.my_node_names))
3381 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3384 feedback_fn("* Verifying configuration file consistency")
3386 # If not all nodes are being checked, we need to make sure the master node
3387 # and a non-checked vm_capable node are in the list.
3388 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3390 vf_nvinfo = all_nvinfo.copy()
3391 vf_node_info = list(self.my_node_info.values())
3392 additional_nodes = []
3393 if master_node not in self.my_node_info:
3394 additional_nodes.append(master_node)
3395 vf_node_info.append(self.all_node_info[master_node])
3396 # Add the first vm_capable node we find which is not included,
3397 # excluding the master node (which we already have)
3398 for node in absent_nodes:
3399 nodeinfo = self.all_node_info[node]
3400 if (nodeinfo.vm_capable and not nodeinfo.offline and
3401 node != master_node):
3402 additional_nodes.append(node)
3403 vf_node_info.append(self.all_node_info[node])
3405 key = constants.NV_FILELIST
3406 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3407 {key: node_verify_param[key]},
3408 self.cfg.GetClusterName()))
3410 vf_nvinfo = all_nvinfo
3411 vf_node_info = self.my_node_info.values()
3413 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3415 feedback_fn("* Verifying node status")
3419 for node_i in node_data_list:
3421 nimg = node_image[node]
3425 feedback_fn("* Skipping offline node %s" % (node,))
3429 if node == master_node:
3431 elif node_i.master_candidate:
3432 ntype = "master candidate"
3433 elif node_i.drained:
3439 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3441 msg = all_nvinfo[node].fail_msg
3442 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3445 nimg.rpc_fail = True
3448 nresult = all_nvinfo[node].payload
3450 nimg.call_ok = self._VerifyNode(node_i, nresult)
3451 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3452 self._VerifyNodeNetwork(node_i, nresult)
3453 self._VerifyNodeUserScripts(node_i, nresult)
3454 self._VerifyOob(node_i, nresult)
3455 self._VerifyFileStoragePaths(node_i, nresult,
3456 node == master_node)
3459 self._VerifyNodeLVM(node_i, nresult, vg_name)
3460 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3463 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3464 self._UpdateNodeInstances(node_i, nresult, nimg)
3465 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3466 self._UpdateNodeOS(node_i, nresult, nimg)
3468 if not nimg.os_fail:
3469 if refos_img is None:
3471 self._VerifyNodeOS(node_i, nimg, refos_img)
3472 self._VerifyNodeBridges(node_i, nresult, bridges)
3474 # Check whether all running instancies are primary for the node. (This
3475 # can no longer be done from _VerifyInstance below, since some of the
3476 # wrong instances could be from other node groups.)
3477 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3479 for inst in non_primary_inst:
3480 test = inst in self.all_inst_info
3481 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3482 "instance should not run on node %s", node_i.name)
3483 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3484 "node is running unknown instance %s", inst)
3486 for node, result in extra_lv_nvinfo.items():
3487 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3488 node_image[node], vg_name)
3490 feedback_fn("* Verifying instance status")
3491 for instance in self.my_inst_names:
3493 feedback_fn("* Verifying instance %s" % instance)
3494 inst_config = self.my_inst_info[instance]
3495 self._VerifyInstance(instance, inst_config, node_image,
3497 inst_nodes_offline = []
3499 pnode = inst_config.primary_node
3500 pnode_img = node_image[pnode]
3501 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3502 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3503 " primary node failed", instance)
3505 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3507 constants.CV_EINSTANCEBADNODE, instance,
3508 "instance is marked as running and lives on offline node %s",
3509 inst_config.primary_node)
3511 # If the instance is non-redundant we cannot survive losing its primary
3512 # node, so we are not N+1 compliant.
3513 if inst_config.disk_template not in constants.DTS_MIRRORED:
3514 i_non_redundant.append(instance)
3516 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3517 constants.CV_EINSTANCELAYOUT,
3518 instance, "instance has multiple secondary nodes: %s",
3519 utils.CommaJoin(inst_config.secondary_nodes),
3520 code=self.ETYPE_WARNING)
3522 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3523 pnode = inst_config.primary_node
3524 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3525 instance_groups = {}
3527 for node in instance_nodes:
3528 instance_groups.setdefault(self.all_node_info[node].group,
3532 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3533 # Sort so that we always list the primary node first.
3534 for group, nodes in sorted(instance_groups.items(),
3535 key=lambda (_, nodes): pnode in nodes,
3538 self._ErrorIf(len(instance_groups) > 1,
3539 constants.CV_EINSTANCESPLITGROUPS,
3540 instance, "instance has primary and secondary nodes in"
3541 " different groups: %s", utils.CommaJoin(pretty_list),
3542 code=self.ETYPE_WARNING)
3544 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3545 i_non_a_balanced.append(instance)
3547 for snode in inst_config.secondary_nodes:
3548 s_img = node_image[snode]
3549 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3550 snode, "instance %s, connection to secondary node failed",
3554 inst_nodes_offline.append(snode)
3556 # warn that the instance lives on offline nodes
3557 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3558 "instance has offline secondary node(s) %s",
3559 utils.CommaJoin(inst_nodes_offline))
3560 # ... or ghost/non-vm_capable nodes
3561 for node in inst_config.all_nodes:
3562 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3563 instance, "instance lives on ghost node %s", node)
3564 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3565 instance, "instance lives on non-vm_capable node %s", node)
3567 feedback_fn("* Verifying orphan volumes")
3568 reserved = utils.FieldSet(*cluster.reserved_lvs)
3570 # We will get spurious "unknown volume" warnings if any node of this group
3571 # is secondary for an instance whose primary is in another group. To avoid
3572 # them, we find these instances and add their volumes to node_vol_should.
3573 for inst in self.all_inst_info.values():
3574 for secondary in inst.secondary_nodes:
3575 if (secondary in self.my_node_info
3576 and inst.name not in self.my_inst_info):
3577 inst.MapLVsByNode(node_vol_should)
3580 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3582 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3583 feedback_fn("* Verifying N+1 Memory redundancy")
3584 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3586 feedback_fn("* Other Notes")
3588 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3589 % len(i_non_redundant))
3591 if i_non_a_balanced:
3592 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3593 % len(i_non_a_balanced))
3596 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3599 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3602 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3606 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3607 """Analyze the post-hooks' result
3609 This method analyses the hook result, handles it, and sends some
3610 nicely-formatted feedback back to the user.
3612 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3613 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3614 @param hooks_results: the results of the multi-node hooks rpc call
3615 @param feedback_fn: function used send feedback back to the caller
3616 @param lu_result: previous Exec result
3617 @return: the new Exec result, based on the previous result
3621 # We only really run POST phase hooks, only for non-empty groups,
3622 # and are only interested in their results
3623 if not self.my_node_names:
3626 elif phase == constants.HOOKS_PHASE_POST:
3627 # Used to change hooks' output to proper indentation
3628 feedback_fn("* Hooks Results")
3629 assert hooks_results, "invalid result from hooks"
3631 for node_name in hooks_results:
3632 res = hooks_results[node_name]
3634 test = msg and not res.offline
3635 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3636 "Communication failure in hooks execution: %s", msg)
3637 if res.offline or msg:
3638 # No need to investigate payload if node is offline or gave
3641 for script, hkr, output in res.payload:
3642 test = hkr == constants.HKR_FAIL
3643 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3644 "Script %s failed, output:", script)
3646 output = self._HOOKS_INDENT_RE.sub(" ", output)
3647 feedback_fn("%s" % output)
3653 class LUClusterVerifyDisks(NoHooksLU):
3654 """Verifies the cluster disks status.
3659 def ExpandNames(self):
3660 self.share_locks = _ShareAll()
3661 self.needed_locks = {
3662 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3665 def Exec(self, feedback_fn):
3666 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3668 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3669 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3670 for group in group_names])
3673 class LUGroupVerifyDisks(NoHooksLU):
3674 """Verifies the status of all disks in a node group.
3679 def ExpandNames(self):
3680 # Raises errors.OpPrereqError on its own if group can't be found
3681 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3683 self.share_locks = _ShareAll()
3684 self.needed_locks = {
3685 locking.LEVEL_INSTANCE: [],
3686 locking.LEVEL_NODEGROUP: [],
3687 locking.LEVEL_NODE: [],
3689 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3690 # starts one instance of this opcode for every group, which means all
3691 # nodes will be locked for a short amount of time, so it's better to
3692 # acquire the node allocation lock as well.
3693 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3696 def DeclareLocks(self, level):
3697 if level == locking.LEVEL_INSTANCE:
3698 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3700 # Lock instances optimistically, needs verification once node and group
3701 # locks have been acquired
3702 self.needed_locks[locking.LEVEL_INSTANCE] = \
3703 self.cfg.GetNodeGroupInstances(self.group_uuid)
3705 elif level == locking.LEVEL_NODEGROUP:
3706 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3708 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3709 set([self.group_uuid] +
3710 # Lock all groups used by instances optimistically; this requires
3711 # going via the node before it's locked, requiring verification
3714 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3715 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3717 elif level == locking.LEVEL_NODE:
3718 # This will only lock the nodes in the group to be verified which contain
3720 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3721 self._LockInstancesNodes()
3723 # Lock all nodes in group to be verified
3724 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3725 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3726 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3728 def CheckPrereq(self):
3729 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3730 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3731 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3733 assert self.group_uuid in owned_groups
3735 # Check if locked instances are still correct
3736 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3738 # Get instance information
3739 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3741 # Check if node groups for locked instances are still correct
3742 _CheckInstancesNodeGroups(self.cfg, self.instances,
3743 owned_groups, owned_nodes, self.group_uuid)
3745 def Exec(self, feedback_fn):
3746 """Verify integrity of cluster disks.
3748 @rtype: tuple of three items
3749 @return: a tuple of (dict of node-to-node_error, list of instances
3750 which need activate-disks, dict of instance: (node, volume) for
3755 res_instances = set()
3758 nv_dict = _MapInstanceDisksToNodes(
3759 [inst for inst in self.instances.values()
3760 if inst.admin_state == constants.ADMINST_UP])
3763 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3764 set(self.cfg.GetVmCapableNodeList()))
3766 node_lvs = self.rpc.call_lv_list(nodes, [])
3768 for (node, node_res) in node_lvs.items():
3769 if node_res.offline:
3772 msg = node_res.fail_msg
3774 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3775 res_nodes[node] = msg
3778 for lv_name, (_, _, lv_online) in node_res.payload.items():
3779 inst = nv_dict.pop((node, lv_name), None)
3780 if not (lv_online or inst is None):
3781 res_instances.add(inst)
3783 # any leftover items in nv_dict are missing LVs, let's arrange the data
3785 for key, inst in nv_dict.iteritems():
3786 res_missing.setdefault(inst, []).append(list(key))
3788 return (res_nodes, list(res_instances), res_missing)
3791 class LUClusterRepairDiskSizes(NoHooksLU):
3792 """Verifies the cluster disks sizes.
3797 def ExpandNames(self):
3798 if self.op.instances:
3799 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3800 # Not getting the node allocation lock as only a specific set of
3801 # instances (and their nodes) is going to be acquired
3802 self.needed_locks = {
3803 locking.LEVEL_NODE_RES: [],
3804 locking.LEVEL_INSTANCE: self.wanted_names,
3806 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3808 self.wanted_names = None
3809 self.needed_locks = {
3810 locking.LEVEL_NODE_RES: locking.ALL_SET,
3811 locking.LEVEL_INSTANCE: locking.ALL_SET,
3813 # This opcode is acquires the node locks for all instances
3814 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3817 self.share_locks = {
3818 locking.LEVEL_NODE_RES: 1,
3819 locking.LEVEL_INSTANCE: 0,
3820 locking.LEVEL_NODE_ALLOC: 1,
3823 def DeclareLocks(self, level):
3824 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3825 self._LockInstancesNodes(primary_only=True, level=level)
3827 def CheckPrereq(self):
3828 """Check prerequisites.
3830 This only checks the optional instance list against the existing names.
3833 if self.wanted_names is None:
3834 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3836 self.wanted_instances = \
3837 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3839 def _EnsureChildSizes(self, disk):
3840 """Ensure children of the disk have the needed disk size.
3842 This is valid mainly for DRBD8 and fixes an issue where the
3843 children have smaller disk size.
3845 @param disk: an L{ganeti.objects.Disk} object
3848 if disk.dev_type == constants.LD_DRBD8:
3849 assert disk.children, "Empty children for DRBD8?"
3850 fchild = disk.children[0]
3851 mismatch = fchild.size < disk.size
3853 self.LogInfo("Child disk has size %d, parent %d, fixing",
3854 fchild.size, disk.size)
3855 fchild.size = disk.size
3857 # and we recurse on this child only, not on the metadev
3858 return self._EnsureChildSizes(fchild) or mismatch
3862 def Exec(self, feedback_fn):
3863 """Verify the size of cluster disks.
3866 # TODO: check child disks too
3867 # TODO: check differences in size between primary/secondary nodes
3869 for instance in self.wanted_instances:
3870 pnode = instance.primary_node
3871 if pnode not in per_node_disks:
3872 per_node_disks[pnode] = []
3873 for idx, disk in enumerate(instance.disks):
3874 per_node_disks[pnode].append((instance, idx, disk))
3876 assert not (frozenset(per_node_disks.keys()) -
3877 self.owned_locks(locking.LEVEL_NODE_RES)), \
3878 "Not owning correct locks"
3879 assert not self.owned_locks(locking.LEVEL_NODE)
3882 for node, dskl in per_node_disks.items():
3883 newl = [v[2].Copy() for v in dskl]
3885 self.cfg.SetDiskID(dsk, node)
3886 result = self.rpc.call_blockdev_getsize(node, newl)
3888 self.LogWarning("Failure in blockdev_getsize call to node"
3889 " %s, ignoring", node)
3891 if len(result.payload) != len(dskl):
3892 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3893 " result.payload=%s", node, len(dskl), result.payload)
3894 self.LogWarning("Invalid result from node %s, ignoring node results",
3897 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3899 self.LogWarning("Disk %d of instance %s did not return size"
3900 " information, ignoring", idx, instance.name)
3902 if not isinstance(size, (int, long)):
3903 self.LogWarning("Disk %d of instance %s did not return valid"
3904 " size information, ignoring", idx, instance.name)
3907 if size != disk.size:
3908 self.LogInfo("Disk %d of instance %s has mismatched size,"
3909 " correcting: recorded %d, actual %d", idx,
3910 instance.name, disk.size, size)
3912 self.cfg.Update(instance, feedback_fn)
3913 changed.append((instance.name, idx, size))
3914 if self._EnsureChildSizes(disk):
3915 self.cfg.Update(instance, feedback_fn)
3916 changed.append((instance.name, idx, disk.size))
3920 class LUClusterRename(LogicalUnit):
3921 """Rename the cluster.
3924 HPATH = "cluster-rename"
3925 HTYPE = constants.HTYPE_CLUSTER
3927 def BuildHooksEnv(self):
3932 "OP_TARGET": self.cfg.GetClusterName(),
3933 "NEW_NAME": self.op.name,
3936 def BuildHooksNodes(self):
3937 """Build hooks nodes.
3940 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3942 def CheckPrereq(self):
3943 """Verify that the passed name is a valid one.
3946 hostname = netutils.GetHostname(name=self.op.name,
3947 family=self.cfg.GetPrimaryIPFamily())
3949 new_name = hostname.name
3950 self.ip = new_ip = hostname.ip
3951 old_name = self.cfg.GetClusterName()
3952 old_ip = self.cfg.GetMasterIP()
3953 if new_name == old_name and new_ip == old_ip:
3954 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3955 " cluster has changed",
3957 if new_ip != old_ip:
3958 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3959 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3960 " reachable on the network" %
3961 new_ip, errors.ECODE_NOTUNIQUE)
3963 self.op.name = new_name
3965 def Exec(self, feedback_fn):
3966 """Rename the cluster.
3969 clustername = self.op.name
3972 # shutdown the master IP
3973 master_params = self.cfg.GetMasterNetworkParameters()
3974 ems = self.cfg.GetUseExternalMipScript()
3975 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3977 result.Raise("Could not disable the master role")
3980 cluster = self.cfg.GetClusterInfo()
3981 cluster.cluster_name = clustername
3982 cluster.master_ip = new_ip
3983 self.cfg.Update(cluster, feedback_fn)
3985 # update the known hosts file
3986 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3987 node_list = self.cfg.GetOnlineNodeList()
3989 node_list.remove(master_params.name)
3992 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3994 master_params.ip = new_ip
3995 result = self.rpc.call_node_activate_master_ip(master_params.name,
3997 msg = result.fail_msg
3999 self.LogWarning("Could not re-enable the master role on"
4000 " the master, please restart manually: %s", msg)
4005 def _ValidateNetmask(cfg, netmask):
4006 """Checks if a netmask is valid.
4008 @type cfg: L{config.ConfigWriter}
4009 @param cfg: The cluster configuration
4011 @param netmask: the netmask to be verified
4012 @raise errors.OpPrereqError: if the validation fails
4015 ip_family = cfg.GetPrimaryIPFamily()
4017 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4018 except errors.ProgrammerError:
4019 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4020 ip_family, errors.ECODE_INVAL)
4021 if not ipcls.ValidateNetmask(netmask):
4022 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4023 (netmask), errors.ECODE_INVAL)
4026 class LUClusterSetParams(LogicalUnit):
4027 """Change the parameters of the cluster.
4030 HPATH = "cluster-modify"
4031 HTYPE = constants.HTYPE_CLUSTER
4034 def CheckArguments(self):
4038 if self.op.uid_pool:
4039 uidpool.CheckUidPool(self.op.uid_pool)
4041 if self.op.add_uids:
4042 uidpool.CheckUidPool(self.op.add_uids)
4044 if self.op.remove_uids:
4045 uidpool.CheckUidPool(self.op.remove_uids)
4047 if self.op.master_netmask is not None:
4048 _ValidateNetmask(self.cfg, self.op.master_netmask)
4050 if self.op.diskparams:
4051 for dt_params in self.op.diskparams.values():
4052 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4054 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4055 except errors.OpPrereqError, err:
4056 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4059 def ExpandNames(self):
4060 # FIXME: in the future maybe other cluster params won't require checking on
4061 # all nodes to be modified.
4062 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4063 # resource locks the right thing, shouldn't it be the BGL instead?
4064 self.needed_locks = {
4065 locking.LEVEL_NODE: locking.ALL_SET,
4066 locking.LEVEL_INSTANCE: locking.ALL_SET,
4067 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4068 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4070 self.share_locks = _ShareAll()
4072 def BuildHooksEnv(self):
4077 "OP_TARGET": self.cfg.GetClusterName(),
4078 "NEW_VG_NAME": self.op.vg_name,
4081 def BuildHooksNodes(self):
4082 """Build hooks nodes.
4085 mn = self.cfg.GetMasterNode()
4088 def CheckPrereq(self):
4089 """Check prerequisites.
4091 This checks whether the given params don't conflict and
4092 if the given volume group is valid.
4095 if self.op.vg_name is not None and not self.op.vg_name:
4096 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4097 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4098 " instances exist", errors.ECODE_INVAL)
4100 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4101 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4102 raise errors.OpPrereqError("Cannot disable drbd helper while"
4103 " drbd-based instances exist",
4106 node_list = self.owned_locks(locking.LEVEL_NODE)
4108 # if vg_name not None, checks given volume group on all nodes
4110 vglist = self.rpc.call_vg_list(node_list)
4111 for node in node_list:
4112 msg = vglist[node].fail_msg
4114 # ignoring down node
4115 self.LogWarning("Error while gathering data on node %s"
4116 " (ignoring node): %s", node, msg)
4118 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4120 constants.MIN_VG_SIZE)
4122 raise errors.OpPrereqError("Error on node '%s': %s" %
4123 (node, vgstatus), errors.ECODE_ENVIRON)
4125 if self.op.drbd_helper:
4126 # checks given drbd helper on all nodes
4127 helpers = self.rpc.call_drbd_helper(node_list)
4128 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4130 self.LogInfo("Not checking drbd helper on offline node %s", node)
4132 msg = helpers[node].fail_msg
4134 raise errors.OpPrereqError("Error checking drbd helper on node"
4135 " '%s': %s" % (node, msg),
4136 errors.ECODE_ENVIRON)
4137 node_helper = helpers[node].payload
4138 if node_helper != self.op.drbd_helper:
4139 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4140 (node, node_helper), errors.ECODE_ENVIRON)
4142 self.cluster = cluster = self.cfg.GetClusterInfo()
4143 # validate params changes
4144 if self.op.beparams:
4145 objects.UpgradeBeParams(self.op.beparams)
4146 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4147 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4149 if self.op.ndparams:
4150 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4151 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4153 # TODO: we need a more general way to handle resetting
4154 # cluster-level parameters to default values
4155 if self.new_ndparams["oob_program"] == "":
4156 self.new_ndparams["oob_program"] = \
4157 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4159 if self.op.hv_state:
4160 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4161 self.cluster.hv_state_static)
4162 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4163 for hv, values in new_hv_state.items())
4165 if self.op.disk_state:
4166 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4167 self.cluster.disk_state_static)
4168 self.new_disk_state = \
4169 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4170 for name, values in svalues.items()))
4171 for storage, svalues in new_disk_state.items())
4174 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4177 all_instances = self.cfg.GetAllInstancesInfo().values()
4179 for group in self.cfg.GetAllNodeGroupsInfo().values():
4180 instances = frozenset([inst for inst in all_instances
4181 if compat.any(node in group.members
4182 for node in inst.all_nodes)])
4183 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4184 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4185 new = _ComputeNewInstanceViolations(ipol,
4186 new_ipolicy, instances)
4188 violations.update(new)
4191 self.LogWarning("After the ipolicy change the following instances"
4192 " violate them: %s",
4193 utils.CommaJoin(utils.NiceSort(violations)))
4195 if self.op.nicparams:
4196 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4197 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4198 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4201 # check all instances for consistency
4202 for instance in self.cfg.GetAllInstancesInfo().values():
4203 for nic_idx, nic in enumerate(instance.nics):
4204 params_copy = copy.deepcopy(nic.nicparams)
4205 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4207 # check parameter syntax
4209 objects.NIC.CheckParameterSyntax(params_filled)
4210 except errors.ConfigurationError, err:
4211 nic_errors.append("Instance %s, nic/%d: %s" %
4212 (instance.name, nic_idx, err))
4214 # if we're moving instances to routed, check that they have an ip
4215 target_mode = params_filled[constants.NIC_MODE]
4216 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4217 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4218 " address" % (instance.name, nic_idx))
4220 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4221 "\n".join(nic_errors), errors.ECODE_INVAL)
4223 # hypervisor list/parameters
4224 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4225 if self.op.hvparams:
4226 for hv_name, hv_dict in self.op.hvparams.items():
4227 if hv_name not in self.new_hvparams:
4228 self.new_hvparams[hv_name] = hv_dict
4230 self.new_hvparams[hv_name].update(hv_dict)
4232 # disk template parameters
4233 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4234 if self.op.diskparams:
4235 for dt_name, dt_params in self.op.diskparams.items():
4236 if dt_name not in self.op.diskparams:
4237 self.new_diskparams[dt_name] = dt_params
4239 self.new_diskparams[dt_name].update(dt_params)
4241 # os hypervisor parameters
4242 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4244 for os_name, hvs in self.op.os_hvp.items():
4245 if os_name not in self.new_os_hvp:
4246 self.new_os_hvp[os_name] = hvs
4248 for hv_name, hv_dict in hvs.items():
4249 if hv_name not in self.new_os_hvp[os_name]:
4250 self.new_os_hvp[os_name][hv_name] = hv_dict
4252 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4255 self.new_osp = objects.FillDict(cluster.osparams, {})
4256 if self.op.osparams:
4257 for os_name, osp in self.op.osparams.items():
4258 if os_name not in self.new_osp:
4259 self.new_osp[os_name] = {}
4261 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4264 if not self.new_osp[os_name]:
4265 # we removed all parameters
4266 del self.new_osp[os_name]
4268 # check the parameter validity (remote check)
4269 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4270 os_name, self.new_osp[os_name])
4272 # changes to the hypervisor list
4273 if self.op.enabled_hypervisors is not None:
4274 self.hv_list = self.op.enabled_hypervisors
4275 for hv in self.hv_list:
4276 # if the hypervisor doesn't already exist in the cluster
4277 # hvparams, we initialize it to empty, and then (in both
4278 # cases) we make sure to fill the defaults, as we might not
4279 # have a complete defaults list if the hypervisor wasn't
4281 if hv not in new_hvp:
4283 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4284 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4286 self.hv_list = cluster.enabled_hypervisors
4288 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4289 # either the enabled list has changed, or the parameters have, validate
4290 for hv_name, hv_params in self.new_hvparams.items():
4291 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4292 (self.op.enabled_hypervisors and
4293 hv_name in self.op.enabled_hypervisors)):
4294 # either this is a new hypervisor, or its parameters have changed
4295 hv_class = hypervisor.GetHypervisor(hv_name)
4296 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4297 hv_class.CheckParameterSyntax(hv_params)
4298 _CheckHVParams(self, node_list, hv_name, hv_params)
4301 # no need to check any newly-enabled hypervisors, since the
4302 # defaults have already been checked in the above code-block
4303 for os_name, os_hvp in self.new_os_hvp.items():
4304 for hv_name, hv_params in os_hvp.items():
4305 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4306 # we need to fill in the new os_hvp on top of the actual hv_p
4307 cluster_defaults = self.new_hvparams.get(hv_name, {})
4308 new_osp = objects.FillDict(cluster_defaults, hv_params)
4309 hv_class = hypervisor.GetHypervisor(hv_name)
4310 hv_class.CheckParameterSyntax(new_osp)
4311 _CheckHVParams(self, node_list, hv_name, new_osp)
4313 if self.op.default_iallocator:
4314 alloc_script = utils.FindFile(self.op.default_iallocator,
4315 constants.IALLOCATOR_SEARCH_PATH,
4317 if alloc_script is None:
4318 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4319 " specified" % self.op.default_iallocator,
4322 def Exec(self, feedback_fn):
4323 """Change the parameters of the cluster.
4326 if self.op.vg_name is not None:
4327 new_volume = self.op.vg_name
4330 if new_volume != self.cfg.GetVGName():
4331 self.cfg.SetVGName(new_volume)
4333 feedback_fn("Cluster LVM configuration already in desired"
4334 " state, not changing")
4335 if self.op.drbd_helper is not None:
4336 new_helper = self.op.drbd_helper
4339 if new_helper != self.cfg.GetDRBDHelper():
4340 self.cfg.SetDRBDHelper(new_helper)
4342 feedback_fn("Cluster DRBD helper already in desired state,"
4344 if self.op.hvparams:
4345 self.cluster.hvparams = self.new_hvparams
4347 self.cluster.os_hvp = self.new_os_hvp
4348 if self.op.enabled_hypervisors is not None:
4349 self.cluster.hvparams = self.new_hvparams
4350 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4351 if self.op.beparams:
4352 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4353 if self.op.nicparams:
4354 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4356 self.cluster.ipolicy = self.new_ipolicy
4357 if self.op.osparams:
4358 self.cluster.osparams = self.new_osp
4359 if self.op.ndparams:
4360 self.cluster.ndparams = self.new_ndparams
4361 if self.op.diskparams:
4362 self.cluster.diskparams = self.new_diskparams
4363 if self.op.hv_state:
4364 self.cluster.hv_state_static = self.new_hv_state
4365 if self.op.disk_state:
4366 self.cluster.disk_state_static = self.new_disk_state
4368 if self.op.candidate_pool_size is not None:
4369 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4370 # we need to update the pool size here, otherwise the save will fail
4371 _AdjustCandidatePool(self, [])
4373 if self.op.maintain_node_health is not None:
4374 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4375 feedback_fn("Note: CONFD was disabled at build time, node health"
4376 " maintenance is not useful (still enabling it)")
4377 self.cluster.maintain_node_health = self.op.maintain_node_health
4379 if self.op.prealloc_wipe_disks is not None:
4380 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4382 if self.op.add_uids is not None:
4383 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4385 if self.op.remove_uids is not None:
4386 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4388 if self.op.uid_pool is not None:
4389 self.cluster.uid_pool = self.op.uid_pool
4391 if self.op.default_iallocator is not None:
4392 self.cluster.default_iallocator = self.op.default_iallocator
4394 if self.op.reserved_lvs is not None:
4395 self.cluster.reserved_lvs = self.op.reserved_lvs
4397 if self.op.use_external_mip_script is not None:
4398 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4400 def helper_os(aname, mods, desc):
4402 lst = getattr(self.cluster, aname)
4403 for key, val in mods:
4404 if key == constants.DDM_ADD:
4406 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4409 elif key == constants.DDM_REMOVE:
4413 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4415 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4417 if self.op.hidden_os:
4418 helper_os("hidden_os", self.op.hidden_os, "hidden")
4420 if self.op.blacklisted_os:
4421 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4423 if self.op.master_netdev:
4424 master_params = self.cfg.GetMasterNetworkParameters()
4425 ems = self.cfg.GetUseExternalMipScript()
4426 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4427 self.cluster.master_netdev)
4428 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4430 result.Raise("Could not disable the master ip")
4431 feedback_fn("Changing master_netdev from %s to %s" %
4432 (master_params.netdev, self.op.master_netdev))
4433 self.cluster.master_netdev = self.op.master_netdev
4435 if self.op.master_netmask:
4436 master_params = self.cfg.GetMasterNetworkParameters()
4437 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4438 result = self.rpc.call_node_change_master_netmask(master_params.name,
4439 master_params.netmask,
4440 self.op.master_netmask,
4442 master_params.netdev)
4444 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4447 self.cluster.master_netmask = self.op.master_netmask
4449 self.cfg.Update(self.cluster, feedback_fn)
4451 if self.op.master_netdev:
4452 master_params = self.cfg.GetMasterNetworkParameters()
4453 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4454 self.op.master_netdev)
4455 ems = self.cfg.GetUseExternalMipScript()
4456 result = self.rpc.call_node_activate_master_ip(master_params.name,
4459 self.LogWarning("Could not re-enable the master ip on"
4460 " the master, please restart manually: %s",
4464 def _UploadHelper(lu, nodes, fname):
4465 """Helper for uploading a file and showing warnings.
4468 if os.path.exists(fname):
4469 result = lu.rpc.call_upload_file(nodes, fname)
4470 for to_node, to_result in result.items():
4471 msg = to_result.fail_msg
4473 msg = ("Copy of file %s to node %s failed: %s" %
4474 (fname, to_node, msg))
4478 def _ComputeAncillaryFiles(cluster, redist):
4479 """Compute files external to Ganeti which need to be consistent.
4481 @type redist: boolean
4482 @param redist: Whether to include files which need to be redistributed
4485 # Compute files for all nodes
4487 pathutils.SSH_KNOWN_HOSTS_FILE,
4488 pathutils.CONFD_HMAC_KEY,
4489 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4490 pathutils.SPICE_CERT_FILE,
4491 pathutils.SPICE_CACERT_FILE,
4492 pathutils.RAPI_USERS_FILE,
4496 # we need to ship at least the RAPI certificate
4497 files_all.add(pathutils.RAPI_CERT_FILE)
4499 files_all.update(pathutils.ALL_CERT_FILES)
4500 files_all.update(ssconf.SimpleStore().GetFileList())
4502 if cluster.modify_etc_hosts:
4503 files_all.add(pathutils.ETC_HOSTS)
4505 if cluster.use_external_mip_script:
4506 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4508 # Files which are optional, these must:
4509 # - be present in one other category as well
4510 # - either exist or not exist on all nodes of that category (mc, vm all)
4512 pathutils.RAPI_USERS_FILE,
4515 # Files which should only be on master candidates
4519 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4523 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4524 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4525 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4527 # Files which should only be on VM-capable nodes
4530 for hv_name in cluster.enabled_hypervisors
4531 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4535 for hv_name in cluster.enabled_hypervisors
4536 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4538 # Filenames in each category must be unique
4539 all_files_set = files_all | files_mc | files_vm
4540 assert (len(all_files_set) ==
4541 sum(map(len, [files_all, files_mc, files_vm]))), \
4542 "Found file listed in more than one file list"
4544 # Optional files must be present in one other category
4545 assert all_files_set.issuperset(files_opt), \
4546 "Optional file not in a different required list"
4548 # This one file should never ever be re-distributed via RPC
4549 assert not (redist and
4550 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4552 return (files_all, files_opt, files_mc, files_vm)
4555 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4556 """Distribute additional files which are part of the cluster configuration.
4558 ConfigWriter takes care of distributing the config and ssconf files, but
4559 there are more files which should be distributed to all nodes. This function
4560 makes sure those are copied.
4562 @param lu: calling logical unit
4563 @param additional_nodes: list of nodes not in the config to distribute to
4564 @type additional_vm: boolean
4565 @param additional_vm: whether the additional nodes are vm-capable or not
4568 # Gather target nodes
4569 cluster = lu.cfg.GetClusterInfo()
4570 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4572 online_nodes = lu.cfg.GetOnlineNodeList()
4573 online_set = frozenset(online_nodes)
4574 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4576 if additional_nodes is not None:
4577 online_nodes.extend(additional_nodes)
4579 vm_nodes.extend(additional_nodes)
4581 # Never distribute to master node
4582 for nodelist in [online_nodes, vm_nodes]:
4583 if master_info.name in nodelist:
4584 nodelist.remove(master_info.name)
4587 (files_all, _, files_mc, files_vm) = \
4588 _ComputeAncillaryFiles(cluster, True)
4590 # Never re-distribute configuration file from here
4591 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4592 pathutils.CLUSTER_CONF_FILE in files_vm)
4593 assert not files_mc, "Master candidates not handled in this function"
4596 (online_nodes, files_all),
4597 (vm_nodes, files_vm),
4601 for (node_list, files) in filemap:
4603 _UploadHelper(lu, node_list, fname)
4606 class LUClusterRedistConf(NoHooksLU):
4607 """Force the redistribution of cluster configuration.
4609 This is a very simple LU.
4614 def ExpandNames(self):
4615 self.needed_locks = {
4616 locking.LEVEL_NODE: locking.ALL_SET,
4617 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4619 self.share_locks = _ShareAll()
4621 def Exec(self, feedback_fn):
4622 """Redistribute the configuration.
4625 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4626 _RedistributeAncillaryFiles(self)
4629 class LUClusterActivateMasterIp(NoHooksLU):
4630 """Activate the master IP on the master node.
4633 def Exec(self, feedback_fn):
4634 """Activate the master IP.
4637 master_params = self.cfg.GetMasterNetworkParameters()
4638 ems = self.cfg.GetUseExternalMipScript()
4639 result = self.rpc.call_node_activate_master_ip(master_params.name,
4641 result.Raise("Could not activate the master IP")
4644 class LUClusterDeactivateMasterIp(NoHooksLU):
4645 """Deactivate the master IP on the master node.
4648 def Exec(self, feedback_fn):
4649 """Deactivate the master IP.
4652 master_params = self.cfg.GetMasterNetworkParameters()
4653 ems = self.cfg.GetUseExternalMipScript()
4654 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4656 result.Raise("Could not deactivate the master IP")
4659 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4660 """Sleep and poll for an instance's disk to sync.
4663 if not instance.disks or disks is not None and not disks:
4666 disks = _ExpandCheckDisks(instance, disks)
4669 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4671 node = instance.primary_node
4674 lu.cfg.SetDiskID(dev, node)
4676 # TODO: Convert to utils.Retry
4679 degr_retries = 10 # in seconds, as we sleep 1 second each time
4683 cumul_degraded = False
4684 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4685 msg = rstats.fail_msg
4687 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4690 raise errors.RemoteError("Can't contact node %s for mirror data,"
4691 " aborting." % node)
4694 rstats = rstats.payload
4696 for i, mstat in enumerate(rstats):
4698 lu.LogWarning("Can't compute data for node %s/%s",
4699 node, disks[i].iv_name)
4702 cumul_degraded = (cumul_degraded or
4703 (mstat.is_degraded and mstat.sync_percent is None))
4704 if mstat.sync_percent is not None:
4706 if mstat.estimated_time is not None:
4707 rem_time = ("%s remaining (estimated)" %
4708 utils.FormatSeconds(mstat.estimated_time))
4709 max_time = mstat.estimated_time
4711 rem_time = "no time estimate"
4712 lu.LogInfo("- device %s: %5.2f%% done, %s",
4713 disks[i].iv_name, mstat.sync_percent, rem_time)
4715 # if we're done but degraded, let's do a few small retries, to
4716 # make sure we see a stable and not transient situation; therefore
4717 # we force restart of the loop
4718 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4719 logging.info("Degraded disks found, %d retries left", degr_retries)
4727 time.sleep(min(60, max_time))
4730 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4732 return not cumul_degraded
4735 def _BlockdevFind(lu, node, dev, instance):
4736 """Wrapper around call_blockdev_find to annotate diskparams.
4738 @param lu: A reference to the lu object
4739 @param node: The node to call out
4740 @param dev: The device to find
4741 @param instance: The instance object the device belongs to
4742 @returns The result of the rpc call
4745 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4746 return lu.rpc.call_blockdev_find(node, disk)
4749 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4750 """Wrapper around L{_CheckDiskConsistencyInner}.
4753 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4754 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4758 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4760 """Check that mirrors are not degraded.
4762 @attention: The device has to be annotated already.
4764 The ldisk parameter, if True, will change the test from the
4765 is_degraded attribute (which represents overall non-ok status for
4766 the device(s)) to the ldisk (representing the local storage status).
4769 lu.cfg.SetDiskID(dev, node)
4773 if on_primary or dev.AssembleOnSecondary():
4774 rstats = lu.rpc.call_blockdev_find(node, dev)
4775 msg = rstats.fail_msg
4777 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4779 elif not rstats.payload:
4780 lu.LogWarning("Can't find disk on node %s", node)
4784 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4786 result = result and not rstats.payload.is_degraded
4789 for child in dev.children:
4790 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4796 class LUOobCommand(NoHooksLU):
4797 """Logical unit for OOB handling.
4801 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4803 def ExpandNames(self):
4804 """Gather locks we need.
4807 if self.op.node_names:
4808 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4809 lock_names = self.op.node_names
4811 lock_names = locking.ALL_SET
4813 self.needed_locks = {
4814 locking.LEVEL_NODE: lock_names,
4817 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4819 if not self.op.node_names:
4820 # Acquire node allocation lock only if all nodes are affected
4821 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4823 def CheckPrereq(self):
4824 """Check prerequisites.
4827 - the node exists in the configuration
4830 Any errors are signaled by raising errors.OpPrereqError.
4834 self.master_node = self.cfg.GetMasterNode()
4836 assert self.op.power_delay >= 0.0
4838 if self.op.node_names:
4839 if (self.op.command in self._SKIP_MASTER and
4840 self.master_node in self.op.node_names):
4841 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4842 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4844 if master_oob_handler:
4845 additional_text = ("run '%s %s %s' if you want to operate on the"
4846 " master regardless") % (master_oob_handler,
4850 additional_text = "it does not support out-of-band operations"
4852 raise errors.OpPrereqError(("Operating on the master node %s is not"
4853 " allowed for %s; %s") %
4854 (self.master_node, self.op.command,
4855 additional_text), errors.ECODE_INVAL)
4857 self.op.node_names = self.cfg.GetNodeList()
4858 if self.op.command in self._SKIP_MASTER:
4859 self.op.node_names.remove(self.master_node)
4861 if self.op.command in self._SKIP_MASTER:
4862 assert self.master_node not in self.op.node_names
4864 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4866 raise errors.OpPrereqError("Node %s not found" % node_name,
4869 self.nodes.append(node)
4871 if (not self.op.ignore_status and
4872 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4873 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4874 " not marked offline") % node_name,
4877 def Exec(self, feedback_fn):
4878 """Execute OOB and return result if we expect any.
4881 master_node = self.master_node
4884 for idx, node in enumerate(utils.NiceSort(self.nodes,
4885 key=lambda node: node.name)):
4886 node_entry = [(constants.RS_NORMAL, node.name)]
4887 ret.append(node_entry)
4889 oob_program = _SupportsOob(self.cfg, node)
4892 node_entry.append((constants.RS_UNAVAIL, None))
4895 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4896 self.op.command, oob_program, node.name)
4897 result = self.rpc.call_run_oob(master_node, oob_program,
4898 self.op.command, node.name,
4902 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4903 node.name, result.fail_msg)
4904 node_entry.append((constants.RS_NODATA, None))
4907 self._CheckPayload(result)
4908 except errors.OpExecError, err:
4909 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4911 node_entry.append((constants.RS_NODATA, None))
4913 if self.op.command == constants.OOB_HEALTH:
4914 # For health we should log important events
4915 for item, status in result.payload:
4916 if status in [constants.OOB_STATUS_WARNING,
4917 constants.OOB_STATUS_CRITICAL]:
4918 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4919 item, node.name, status)
4921 if self.op.command == constants.OOB_POWER_ON:
4923 elif self.op.command == constants.OOB_POWER_OFF:
4924 node.powered = False
4925 elif self.op.command == constants.OOB_POWER_STATUS:
4926 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4927 if powered != node.powered:
4928 logging.warning(("Recorded power state (%s) of node '%s' does not"
4929 " match actual power state (%s)"), node.powered,
4932 # For configuration changing commands we should update the node
4933 if self.op.command in (constants.OOB_POWER_ON,
4934 constants.OOB_POWER_OFF):
4935 self.cfg.Update(node, feedback_fn)
4937 node_entry.append((constants.RS_NORMAL, result.payload))
4939 if (self.op.command == constants.OOB_POWER_ON and
4940 idx < len(self.nodes) - 1):
4941 time.sleep(self.op.power_delay)
4945 def _CheckPayload(self, result):
4946 """Checks if the payload is valid.
4948 @param result: RPC result
4949 @raises errors.OpExecError: If payload is not valid
4953 if self.op.command == constants.OOB_HEALTH:
4954 if not isinstance(result.payload, list):
4955 errs.append("command 'health' is expected to return a list but got %s" %
4956 type(result.payload))
4958 for item, status in result.payload:
4959 if status not in constants.OOB_STATUSES:
4960 errs.append("health item '%s' has invalid status '%s'" %
4963 if self.op.command == constants.OOB_POWER_STATUS:
4964 if not isinstance(result.payload, dict):
4965 errs.append("power-status is expected to return a dict but got %s" %
4966 type(result.payload))
4968 if self.op.command in [
4969 constants.OOB_POWER_ON,
4970 constants.OOB_POWER_OFF,
4971 constants.OOB_POWER_CYCLE,
4973 if result.payload is not None:
4974 errs.append("%s is expected to not return payload but got '%s'" %
4975 (self.op.command, result.payload))
4978 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4979 utils.CommaJoin(errs))
4982 class _OsQuery(_QueryBase):
4983 FIELDS = query.OS_FIELDS
4985 def ExpandNames(self, lu):
4986 # Lock all nodes in shared mode
4987 # Temporary removal of locks, should be reverted later
4988 # TODO: reintroduce locks when they are lighter-weight
4989 lu.needed_locks = {}
4990 #self.share_locks[locking.LEVEL_NODE] = 1
4991 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4993 # The following variables interact with _QueryBase._GetNames
4995 self.wanted = self.names
4997 self.wanted = locking.ALL_SET
4999 self.do_locking = self.use_locking
5001 def DeclareLocks(self, lu, level):
5005 def _DiagnoseByOS(rlist):
5006 """Remaps a per-node return list into an a per-os per-node dictionary
5008 @param rlist: a map with node names as keys and OS objects as values
5011 @return: a dictionary with osnames as keys and as value another
5012 map, with nodes as keys and tuples of (path, status, diagnose,
5013 variants, parameters, api_versions) as values, eg::
5015 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5016 (/srv/..., False, "invalid api")],
5017 "node2": [(/srv/..., True, "", [], [])]}
5022 # we build here the list of nodes that didn't fail the RPC (at RPC
5023 # level), so that nodes with a non-responding node daemon don't
5024 # make all OSes invalid
5025 good_nodes = [node_name for node_name in rlist
5026 if not rlist[node_name].fail_msg]
5027 for node_name, nr in rlist.items():
5028 if nr.fail_msg or not nr.payload:
5030 for (name, path, status, diagnose, variants,
5031 params, api_versions) in nr.payload:
5032 if name not in all_os:
5033 # build a list of nodes for this os containing empty lists
5034 # for each node in node_list
5036 for nname in good_nodes:
5037 all_os[name][nname] = []
5038 # convert params from [name, help] to (name, help)
5039 params = [tuple(v) for v in params]
5040 all_os[name][node_name].append((path, status, diagnose,
5041 variants, params, api_versions))
5044 def _GetQueryData(self, lu):
5045 """Computes the list of nodes and their attributes.
5048 # Locking is not used
5049 assert not (compat.any(lu.glm.is_owned(level)
5050 for level in locking.LEVELS
5051 if level != locking.LEVEL_CLUSTER) or
5052 self.do_locking or self.use_locking)
5054 valid_nodes = [node.name
5055 for node in lu.cfg.GetAllNodesInfo().values()
5056 if not node.offline and node.vm_capable]
5057 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5058 cluster = lu.cfg.GetClusterInfo()
5062 for (os_name, os_data) in pol.items():
5063 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5064 hidden=(os_name in cluster.hidden_os),
5065 blacklisted=(os_name in cluster.blacklisted_os))
5069 api_versions = set()
5071 for idx, osl in enumerate(os_data.values()):
5072 info.valid = bool(info.valid and osl and osl[0][1])
5076 (node_variants, node_params, node_api) = osl[0][3:6]
5079 variants.update(node_variants)
5080 parameters.update(node_params)
5081 api_versions.update(node_api)
5083 # Filter out inconsistent values
5084 variants.intersection_update(node_variants)
5085 parameters.intersection_update(node_params)
5086 api_versions.intersection_update(node_api)
5088 info.variants = list(variants)
5089 info.parameters = list(parameters)
5090 info.api_versions = list(api_versions)
5092 data[os_name] = info
5094 # Prepare data in requested order
5095 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5099 class LUOsDiagnose(NoHooksLU):
5100 """Logical unit for OS diagnose/query.
5106 def _BuildFilter(fields, names):
5107 """Builds a filter for querying OSes.
5110 name_filter = qlang.MakeSimpleFilter("name", names)
5112 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5113 # respective field is not requested
5114 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5115 for fname in ["hidden", "blacklisted"]
5116 if fname not in fields]
5117 if "valid" not in fields:
5118 status_filter.append([qlang.OP_TRUE, "valid"])
5121 status_filter.insert(0, qlang.OP_AND)
5123 status_filter = None
5125 if name_filter and status_filter:
5126 return [qlang.OP_AND, name_filter, status_filter]
5130 return status_filter
5132 def CheckArguments(self):
5133 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5134 self.op.output_fields, False)
5136 def ExpandNames(self):
5137 self.oq.ExpandNames(self)
5139 def Exec(self, feedback_fn):
5140 return self.oq.OldStyleQuery(self)
5143 class LUNodeRemove(LogicalUnit):
5144 """Logical unit for removing a node.
5147 HPATH = "node-remove"
5148 HTYPE = constants.HTYPE_NODE
5150 def BuildHooksEnv(self):
5155 "OP_TARGET": self.op.node_name,
5156 "NODE_NAME": self.op.node_name,
5159 def BuildHooksNodes(self):
5160 """Build hooks nodes.
5162 This doesn't run on the target node in the pre phase as a failed
5163 node would then be impossible to remove.
5166 all_nodes = self.cfg.GetNodeList()
5168 all_nodes.remove(self.op.node_name)
5171 return (all_nodes, all_nodes)
5173 def CheckPrereq(self):
5174 """Check prerequisites.
5177 - the node exists in the configuration
5178 - it does not have primary or secondary instances
5179 - it's not the master
5181 Any errors are signaled by raising errors.OpPrereqError.
5184 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5185 node = self.cfg.GetNodeInfo(self.op.node_name)
5186 assert node is not None
5188 masternode = self.cfg.GetMasterNode()
5189 if node.name == masternode:
5190 raise errors.OpPrereqError("Node is the master node, failover to another"
5191 " node is required", errors.ECODE_INVAL)
5193 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5194 if node.name in instance.all_nodes:
5195 raise errors.OpPrereqError("Instance %s is still running on the node,"
5196 " please remove first" % instance_name,
5198 self.op.node_name = node.name
5201 def Exec(self, feedback_fn):
5202 """Removes the node from the cluster.
5206 logging.info("Stopping the node daemon and removing configs from node %s",
5209 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5211 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5214 # Promote nodes to master candidate as needed
5215 _AdjustCandidatePool(self, exceptions=[node.name])
5216 self.context.RemoveNode(node.name)
5218 # Run post hooks on the node before it's removed
5219 _RunPostHook(self, node.name)
5221 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5222 msg = result.fail_msg
5224 self.LogWarning("Errors encountered on the remote node while leaving"
5225 " the cluster: %s", msg)
5227 # Remove node from our /etc/hosts
5228 if self.cfg.GetClusterInfo().modify_etc_hosts:
5229 master_node = self.cfg.GetMasterNode()
5230 result = self.rpc.call_etc_hosts_modify(master_node,
5231 constants.ETC_HOSTS_REMOVE,
5233 result.Raise("Can't update hosts file with new host data")
5234 _RedistributeAncillaryFiles(self)
5237 class _NodeQuery(_QueryBase):
5238 FIELDS = query.NODE_FIELDS
5240 def ExpandNames(self, lu):
5241 lu.needed_locks = {}
5242 lu.share_locks = _ShareAll()
5245 self.wanted = _GetWantedNodes(lu, self.names)
5247 self.wanted = locking.ALL_SET
5249 self.do_locking = (self.use_locking and
5250 query.NQ_LIVE in self.requested_data)
5253 # If any non-static field is requested we need to lock the nodes
5254 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5255 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5257 def DeclareLocks(self, lu, level):
5260 def _GetQueryData(self, lu):
5261 """Computes the list of nodes and their attributes.
5264 all_info = lu.cfg.GetAllNodesInfo()
5266 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5268 # Gather data as requested
5269 if query.NQ_LIVE in self.requested_data:
5270 # filter out non-vm_capable nodes
5271 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5273 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5274 [lu.cfg.GetHypervisorType()])
5275 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5276 for (name, nresult) in node_data.items()
5277 if not nresult.fail_msg and nresult.payload)
5281 if query.NQ_INST in self.requested_data:
5282 node_to_primary = dict([(name, set()) for name in nodenames])
5283 node_to_secondary = dict([(name, set()) for name in nodenames])
5285 inst_data = lu.cfg.GetAllInstancesInfo()
5287 for inst in inst_data.values():
5288 if inst.primary_node in node_to_primary:
5289 node_to_primary[inst.primary_node].add(inst.name)
5290 for secnode in inst.secondary_nodes:
5291 if secnode in node_to_secondary:
5292 node_to_secondary[secnode].add(inst.name)
5294 node_to_primary = None
5295 node_to_secondary = None
5297 if query.NQ_OOB in self.requested_data:
5298 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5299 for name, node in all_info.iteritems())
5303 if query.NQ_GROUP in self.requested_data:
5304 groups = lu.cfg.GetAllNodeGroupsInfo()
5308 return query.NodeQueryData([all_info[name] for name in nodenames],
5309 live_data, lu.cfg.GetMasterNode(),
5310 node_to_primary, node_to_secondary, groups,
5311 oob_support, lu.cfg.GetClusterInfo())
5314 class LUNodeQuery(NoHooksLU):
5315 """Logical unit for querying nodes.
5318 # pylint: disable=W0142
5321 def CheckArguments(self):
5322 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5323 self.op.output_fields, self.op.use_locking)
5325 def ExpandNames(self):
5326 self.nq.ExpandNames(self)
5328 def DeclareLocks(self, level):
5329 self.nq.DeclareLocks(self, level)
5331 def Exec(self, feedback_fn):
5332 return self.nq.OldStyleQuery(self)
5335 class LUNodeQueryvols(NoHooksLU):
5336 """Logical unit for getting volumes on node(s).
5340 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5341 _FIELDS_STATIC = utils.FieldSet("node")
5343 def CheckArguments(self):
5344 _CheckOutputFields(static=self._FIELDS_STATIC,
5345 dynamic=self._FIELDS_DYNAMIC,
5346 selected=self.op.output_fields)
5348 def ExpandNames(self):
5349 self.share_locks = _ShareAll()
5352 self.needed_locks = {
5353 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5356 self.needed_locks = {
5357 locking.LEVEL_NODE: locking.ALL_SET,
5358 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5361 def Exec(self, feedback_fn):
5362 """Computes the list of nodes and their attributes.
5365 nodenames = self.owned_locks(locking.LEVEL_NODE)
5366 volumes = self.rpc.call_node_volumes(nodenames)
5368 ilist = self.cfg.GetAllInstancesInfo()
5369 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5372 for node in nodenames:
5373 nresult = volumes[node]
5376 msg = nresult.fail_msg
5378 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5381 node_vols = sorted(nresult.payload,
5382 key=operator.itemgetter("dev"))
5384 for vol in node_vols:
5386 for field in self.op.output_fields:
5389 elif field == "phys":
5393 elif field == "name":
5395 elif field == "size":
5396 val = int(float(vol["size"]))
5397 elif field == "instance":
5398 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5400 raise errors.ParameterError(field)
5401 node_output.append(str(val))
5403 output.append(node_output)
5408 class LUNodeQueryStorage(NoHooksLU):
5409 """Logical unit for getting information on storage units on node(s).
5412 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5415 def CheckArguments(self):
5416 _CheckOutputFields(static=self._FIELDS_STATIC,
5417 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5418 selected=self.op.output_fields)
5420 def ExpandNames(self):
5421 self.share_locks = _ShareAll()
5424 self.needed_locks = {
5425 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5428 self.needed_locks = {
5429 locking.LEVEL_NODE: locking.ALL_SET,
5430 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5433 def Exec(self, feedback_fn):
5434 """Computes the list of nodes and their attributes.
5437 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5439 # Always get name to sort by
5440 if constants.SF_NAME in self.op.output_fields:
5441 fields = self.op.output_fields[:]
5443 fields = [constants.SF_NAME] + self.op.output_fields
5445 # Never ask for node or type as it's only known to the LU
5446 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5447 while extra in fields:
5448 fields.remove(extra)
5450 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5451 name_idx = field_idx[constants.SF_NAME]
5453 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5454 data = self.rpc.call_storage_list(self.nodes,
5455 self.op.storage_type, st_args,
5456 self.op.name, fields)
5460 for node in utils.NiceSort(self.nodes):
5461 nresult = data[node]
5465 msg = nresult.fail_msg
5467 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5470 rows = dict([(row[name_idx], row) for row in nresult.payload])
5472 for name in utils.NiceSort(rows.keys()):
5477 for field in self.op.output_fields:
5478 if field == constants.SF_NODE:
5480 elif field == constants.SF_TYPE:
5481 val = self.op.storage_type
5482 elif field in field_idx:
5483 val = row[field_idx[field]]
5485 raise errors.ParameterError(field)
5494 class _InstanceQuery(_QueryBase):
5495 FIELDS = query.INSTANCE_FIELDS
5497 def ExpandNames(self, lu):
5498 lu.needed_locks = {}
5499 lu.share_locks = _ShareAll()
5502 self.wanted = _GetWantedInstances(lu, self.names)
5504 self.wanted = locking.ALL_SET
5506 self.do_locking = (self.use_locking and
5507 query.IQ_LIVE in self.requested_data)
5509 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5510 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5511 lu.needed_locks[locking.LEVEL_NODE] = []
5512 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5514 self.do_grouplocks = (self.do_locking and
5515 query.IQ_NODES in self.requested_data)
5517 def DeclareLocks(self, lu, level):
5519 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5520 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5522 # Lock all groups used by instances optimistically; this requires going
5523 # via the node before it's locked, requiring verification later on
5524 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5526 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5527 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5528 elif level == locking.LEVEL_NODE:
5529 lu._LockInstancesNodes() # pylint: disable=W0212
5532 def _CheckGroupLocks(lu):
5533 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5534 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5536 # Check if node groups for locked instances are still correct
5537 for instance_name in owned_instances:
5538 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5540 def _GetQueryData(self, lu):
5541 """Computes the list of instances and their attributes.
5544 if self.do_grouplocks:
5545 self._CheckGroupLocks(lu)
5547 cluster = lu.cfg.GetClusterInfo()
5548 all_info = lu.cfg.GetAllInstancesInfo()
5550 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5552 instance_list = [all_info[name] for name in instance_names]
5553 nodes = frozenset(itertools.chain(*(inst.all_nodes
5554 for inst in instance_list)))
5555 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5558 wrongnode_inst = set()
5560 # Gather data as requested
5561 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5563 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5565 result = node_data[name]
5567 # offline nodes will be in both lists
5568 assert result.fail_msg
5569 offline_nodes.append(name)
5571 bad_nodes.append(name)
5572 elif result.payload:
5573 for inst in result.payload:
5574 if inst in all_info:
5575 if all_info[inst].primary_node == name:
5576 live_data.update(result.payload)
5578 wrongnode_inst.add(inst)
5580 # orphan instance; we don't list it here as we don't
5581 # handle this case yet in the output of instance listing
5582 logging.warning("Orphan instance '%s' found on node %s",
5584 # else no instance is alive
5588 if query.IQ_DISKUSAGE in self.requested_data:
5589 gmi = ganeti.masterd.instance
5590 disk_usage = dict((inst.name,
5591 gmi.ComputeDiskSize(inst.disk_template,
5592 [{constants.IDISK_SIZE: disk.size}
5593 for disk in inst.disks]))
5594 for inst in instance_list)
5598 if query.IQ_CONSOLE in self.requested_data:
5600 for inst in instance_list:
5601 if inst.name in live_data:
5602 # Instance is running
5603 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5605 consinfo[inst.name] = None
5606 assert set(consinfo.keys()) == set(instance_names)
5610 if query.IQ_NODES in self.requested_data:
5611 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5613 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5614 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5615 for uuid in set(map(operator.attrgetter("group"),
5621 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5622 disk_usage, offline_nodes, bad_nodes,
5623 live_data, wrongnode_inst, consinfo,
5627 class LUQuery(NoHooksLU):
5628 """Query for resources/items of a certain kind.
5631 # pylint: disable=W0142
5634 def CheckArguments(self):
5635 qcls = _GetQueryImplementation(self.op.what)
5637 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5639 def ExpandNames(self):
5640 self.impl.ExpandNames(self)
5642 def DeclareLocks(self, level):
5643 self.impl.DeclareLocks(self, level)
5645 def Exec(self, feedback_fn):
5646 return self.impl.NewStyleQuery(self)
5649 class LUQueryFields(NoHooksLU):
5650 """Query for resources/items of a certain kind.
5653 # pylint: disable=W0142
5656 def CheckArguments(self):
5657 self.qcls = _GetQueryImplementation(self.op.what)
5659 def ExpandNames(self):
5660 self.needed_locks = {}
5662 def Exec(self, feedback_fn):
5663 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5666 class LUNodeModifyStorage(NoHooksLU):
5667 """Logical unit for modifying a storage volume on a node.
5672 def CheckArguments(self):
5673 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5675 storage_type = self.op.storage_type
5678 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5680 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5681 " modified" % storage_type,
5684 diff = set(self.op.changes.keys()) - modifiable
5686 raise errors.OpPrereqError("The following fields can not be modified for"
5687 " storage units of type '%s': %r" %
5688 (storage_type, list(diff)),
5691 def ExpandNames(self):
5692 self.needed_locks = {
5693 locking.LEVEL_NODE: self.op.node_name,
5696 def Exec(self, feedback_fn):
5697 """Computes the list of nodes and their attributes.
5700 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5701 result = self.rpc.call_storage_modify(self.op.node_name,
5702 self.op.storage_type, st_args,
5703 self.op.name, self.op.changes)
5704 result.Raise("Failed to modify storage unit '%s' on %s" %
5705 (self.op.name, self.op.node_name))
5708 class LUNodeAdd(LogicalUnit):
5709 """Logical unit for adding node to the cluster.
5713 HTYPE = constants.HTYPE_NODE
5714 _NFLAGS = ["master_capable", "vm_capable"]
5716 def CheckArguments(self):
5717 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5718 # validate/normalize the node name
5719 self.hostname = netutils.GetHostname(name=self.op.node_name,
5720 family=self.primary_ip_family)
5721 self.op.node_name = self.hostname.name
5723 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5724 raise errors.OpPrereqError("Cannot readd the master node",
5727 if self.op.readd and self.op.group:
5728 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5729 " being readded", errors.ECODE_INVAL)
5731 def BuildHooksEnv(self):
5734 This will run on all nodes before, and on all nodes + the new node after.
5738 "OP_TARGET": self.op.node_name,
5739 "NODE_NAME": self.op.node_name,
5740 "NODE_PIP": self.op.primary_ip,
5741 "NODE_SIP": self.op.secondary_ip,
5742 "MASTER_CAPABLE": str(self.op.master_capable),
5743 "VM_CAPABLE": str(self.op.vm_capable),
5746 def BuildHooksNodes(self):
5747 """Build hooks nodes.
5750 # Exclude added node
5751 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5752 post_nodes = pre_nodes + [self.op.node_name, ]
5754 return (pre_nodes, post_nodes)
5756 def CheckPrereq(self):
5757 """Check prerequisites.
5760 - the new node is not already in the config
5762 - its parameters (single/dual homed) matches the cluster
5764 Any errors are signaled by raising errors.OpPrereqError.
5768 hostname = self.hostname
5769 node = hostname.name
5770 primary_ip = self.op.primary_ip = hostname.ip
5771 if self.op.secondary_ip is None:
5772 if self.primary_ip_family == netutils.IP6Address.family:
5773 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5774 " IPv4 address must be given as secondary",
5776 self.op.secondary_ip = primary_ip
5778 secondary_ip = self.op.secondary_ip
5779 if not netutils.IP4Address.IsValid(secondary_ip):
5780 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5781 " address" % secondary_ip, errors.ECODE_INVAL)
5783 node_list = cfg.GetNodeList()
5784 if not self.op.readd and node in node_list:
5785 raise errors.OpPrereqError("Node %s is already in the configuration" %
5786 node, errors.ECODE_EXISTS)
5787 elif self.op.readd and node not in node_list:
5788 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5791 self.changed_primary_ip = False
5793 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5794 if self.op.readd and node == existing_node_name:
5795 if existing_node.secondary_ip != secondary_ip:
5796 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5797 " address configuration as before",
5799 if existing_node.primary_ip != primary_ip:
5800 self.changed_primary_ip = True
5804 if (existing_node.primary_ip == primary_ip or
5805 existing_node.secondary_ip == primary_ip or
5806 existing_node.primary_ip == secondary_ip or
5807 existing_node.secondary_ip == secondary_ip):
5808 raise errors.OpPrereqError("New node ip address(es) conflict with"
5809 " existing node %s" % existing_node.name,
5810 errors.ECODE_NOTUNIQUE)
5812 # After this 'if' block, None is no longer a valid value for the
5813 # _capable op attributes
5815 old_node = self.cfg.GetNodeInfo(node)
5816 assert old_node is not None, "Can't retrieve locked node %s" % node
5817 for attr in self._NFLAGS:
5818 if getattr(self.op, attr) is None:
5819 setattr(self.op, attr, getattr(old_node, attr))
5821 for attr in self._NFLAGS:
5822 if getattr(self.op, attr) is None:
5823 setattr(self.op, attr, True)
5825 if self.op.readd and not self.op.vm_capable:
5826 pri, sec = cfg.GetNodeInstances(node)
5828 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5829 " flag set to false, but it already holds"
5830 " instances" % node,
5833 # check that the type of the node (single versus dual homed) is the
5834 # same as for the master
5835 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5836 master_singlehomed = myself.secondary_ip == myself.primary_ip
5837 newbie_singlehomed = secondary_ip == primary_ip
5838 if master_singlehomed != newbie_singlehomed:
5839 if master_singlehomed:
5840 raise errors.OpPrereqError("The master has no secondary ip but the"
5841 " new node has one",
5844 raise errors.OpPrereqError("The master has a secondary ip but the"
5845 " new node doesn't have one",
5848 # checks reachability
5849 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5850 raise errors.OpPrereqError("Node not reachable by ping",
5851 errors.ECODE_ENVIRON)
5853 if not newbie_singlehomed:
5854 # check reachability from my secondary ip to newbie's secondary ip
5855 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5856 source=myself.secondary_ip):
5857 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5858 " based ping to node daemon port",
5859 errors.ECODE_ENVIRON)
5866 if self.op.master_capable:
5867 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5869 self.master_candidate = False
5872 self.new_node = old_node
5874 node_group = cfg.LookupNodeGroup(self.op.group)
5875 self.new_node = objects.Node(name=node,
5876 primary_ip=primary_ip,
5877 secondary_ip=secondary_ip,
5878 master_candidate=self.master_candidate,
5879 offline=False, drained=False,
5882 if self.op.ndparams:
5883 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5885 if self.op.hv_state:
5886 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5888 if self.op.disk_state:
5889 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5891 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5892 # it a property on the base class.
5893 result = rpc.DnsOnlyRunner().call_version([node])[node]
5894 result.Raise("Can't get version information from node %s" % node)
5895 if constants.PROTOCOL_VERSION == result.payload:
5896 logging.info("Communication to node %s fine, sw version %s match",
5897 node, result.payload)
5899 raise errors.OpPrereqError("Version mismatch master version %s,"
5900 " node version %s" %
5901 (constants.PROTOCOL_VERSION, result.payload),
5902 errors.ECODE_ENVIRON)
5904 def Exec(self, feedback_fn):
5905 """Adds the new node to the cluster.
5908 new_node = self.new_node
5909 node = new_node.name
5911 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5914 # We adding a new node so we assume it's powered
5915 new_node.powered = True
5917 # for re-adds, reset the offline/drained/master-candidate flags;
5918 # we need to reset here, otherwise offline would prevent RPC calls
5919 # later in the procedure; this also means that if the re-add
5920 # fails, we are left with a non-offlined, broken node
5922 new_node.drained = new_node.offline = False # pylint: disable=W0201
5923 self.LogInfo("Readding a node, the offline/drained flags were reset")
5924 # if we demote the node, we do cleanup later in the procedure
5925 new_node.master_candidate = self.master_candidate
5926 if self.changed_primary_ip:
5927 new_node.primary_ip = self.op.primary_ip
5929 # copy the master/vm_capable flags
5930 for attr in self._NFLAGS:
5931 setattr(new_node, attr, getattr(self.op, attr))
5933 # notify the user about any possible mc promotion
5934 if new_node.master_candidate:
5935 self.LogInfo("Node will be a master candidate")
5937 if self.op.ndparams:
5938 new_node.ndparams = self.op.ndparams
5940 new_node.ndparams = {}
5942 if self.op.hv_state:
5943 new_node.hv_state_static = self.new_hv_state
5945 if self.op.disk_state:
5946 new_node.disk_state_static = self.new_disk_state
5948 # Add node to our /etc/hosts, and add key to known_hosts
5949 if self.cfg.GetClusterInfo().modify_etc_hosts:
5950 master_node = self.cfg.GetMasterNode()
5951 result = self.rpc.call_etc_hosts_modify(master_node,
5952 constants.ETC_HOSTS_ADD,
5955 result.Raise("Can't update hosts file with new host data")
5957 if new_node.secondary_ip != new_node.primary_ip:
5958 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5961 node_verify_list = [self.cfg.GetMasterNode()]
5962 node_verify_param = {
5963 constants.NV_NODELIST: ([node], {}),
5964 # TODO: do a node-net-test as well?
5967 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5968 self.cfg.GetClusterName())
5969 for verifier in node_verify_list:
5970 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5971 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5973 for failed in nl_payload:
5974 feedback_fn("ssh/hostname verification failed"
5975 " (checking from %s): %s" %
5976 (verifier, nl_payload[failed]))
5977 raise errors.OpExecError("ssh/hostname verification failed")
5980 _RedistributeAncillaryFiles(self)
5981 self.context.ReaddNode(new_node)
5982 # make sure we redistribute the config
5983 self.cfg.Update(new_node, feedback_fn)
5984 # and make sure the new node will not have old files around
5985 if not new_node.master_candidate:
5986 result = self.rpc.call_node_demote_from_mc(new_node.name)
5987 msg = result.fail_msg
5989 self.LogWarning("Node failed to demote itself from master"
5990 " candidate status: %s" % msg)
5992 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5993 additional_vm=self.op.vm_capable)
5994 self.context.AddNode(new_node, self.proc.GetECId())
5997 class LUNodeSetParams(LogicalUnit):
5998 """Modifies the parameters of a node.
6000 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6001 to the node role (as _ROLE_*)
6002 @cvar _R2F: a dictionary from node role to tuples of flags
6003 @cvar _FLAGS: a list of attribute names corresponding to the flags
6006 HPATH = "node-modify"
6007 HTYPE = constants.HTYPE_NODE
6009 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6011 (True, False, False): _ROLE_CANDIDATE,
6012 (False, True, False): _ROLE_DRAINED,
6013 (False, False, True): _ROLE_OFFLINE,
6014 (False, False, False): _ROLE_REGULAR,
6016 _R2F = dict((v, k) for k, v in _F2R.items())
6017 _FLAGS = ["master_candidate", "drained", "offline"]
6019 def CheckArguments(self):
6020 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6021 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6022 self.op.master_capable, self.op.vm_capable,
6023 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6025 if all_mods.count(None) == len(all_mods):
6026 raise errors.OpPrereqError("Please pass at least one modification",
6028 if all_mods.count(True) > 1:
6029 raise errors.OpPrereqError("Can't set the node into more than one"
6030 " state at the same time",
6033 # Boolean value that tells us whether we might be demoting from MC
6034 self.might_demote = (self.op.master_candidate is False or
6035 self.op.offline is True or
6036 self.op.drained is True or
6037 self.op.master_capable is False)
6039 if self.op.secondary_ip:
6040 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6041 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6042 " address" % self.op.secondary_ip,
6045 self.lock_all = self.op.auto_promote and self.might_demote
6046 self.lock_instances = self.op.secondary_ip is not None
6048 def _InstanceFilter(self, instance):
6049 """Filter for getting affected instances.
6052 return (instance.disk_template in constants.DTS_INT_MIRROR and
6053 self.op.node_name in instance.all_nodes)
6055 def ExpandNames(self):
6057 self.needed_locks = {
6058 locking.LEVEL_NODE: locking.ALL_SET,
6060 # Block allocations when all nodes are locked
6061 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6064 self.needed_locks = {
6065 locking.LEVEL_NODE: self.op.node_name,
6068 # Since modifying a node can have severe effects on currently running
6069 # operations the resource lock is at least acquired in shared mode
6070 self.needed_locks[locking.LEVEL_NODE_RES] = \
6071 self.needed_locks[locking.LEVEL_NODE]
6073 # Get all locks except nodes in shared mode; they are not used for anything
6074 # but read-only access
6075 self.share_locks = _ShareAll()
6076 self.share_locks[locking.LEVEL_NODE] = 0
6077 self.share_locks[locking.LEVEL_NODE_RES] = 0
6078 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6080 if self.lock_instances:
6081 self.needed_locks[locking.LEVEL_INSTANCE] = \
6082 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6084 def BuildHooksEnv(self):
6087 This runs on the master node.
6091 "OP_TARGET": self.op.node_name,
6092 "MASTER_CANDIDATE": str(self.op.master_candidate),
6093 "OFFLINE": str(self.op.offline),
6094 "DRAINED": str(self.op.drained),
6095 "MASTER_CAPABLE": str(self.op.master_capable),
6096 "VM_CAPABLE": str(self.op.vm_capable),
6099 def BuildHooksNodes(self):
6100 """Build hooks nodes.
6103 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6106 def CheckPrereq(self):
6107 """Check prerequisites.
6109 This only checks the instance list against the existing names.
6112 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6114 if self.lock_instances:
6115 affected_instances = \
6116 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6118 # Verify instance locks
6119 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6120 wanted_instances = frozenset(affected_instances.keys())
6121 if wanted_instances - owned_instances:
6122 raise errors.OpPrereqError("Instances affected by changing node %s's"
6123 " secondary IP address have changed since"
6124 " locks were acquired, wanted '%s', have"
6125 " '%s'; retry the operation" %
6127 utils.CommaJoin(wanted_instances),
6128 utils.CommaJoin(owned_instances)),
6131 affected_instances = None
6133 if (self.op.master_candidate is not None or
6134 self.op.drained is not None or
6135 self.op.offline is not None):
6136 # we can't change the master's node flags
6137 if self.op.node_name == self.cfg.GetMasterNode():
6138 raise errors.OpPrereqError("The master role can be changed"
6139 " only via master-failover",
6142 if self.op.master_candidate and not node.master_capable:
6143 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6144 " it a master candidate" % node.name,
6147 if self.op.vm_capable is False:
6148 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6150 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6151 " the vm_capable flag" % node.name,
6154 if node.master_candidate and self.might_demote and not self.lock_all:
6155 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6156 # check if after removing the current node, we're missing master
6158 (mc_remaining, mc_should, _) = \
6159 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6160 if mc_remaining < mc_should:
6161 raise errors.OpPrereqError("Not enough master candidates, please"
6162 " pass auto promote option to allow"
6163 " promotion (--auto-promote or RAPI"
6164 " auto_promote=True)", errors.ECODE_STATE)
6166 self.old_flags = old_flags = (node.master_candidate,
6167 node.drained, node.offline)
6168 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6169 self.old_role = old_role = self._F2R[old_flags]
6171 # Check for ineffective changes
6172 for attr in self._FLAGS:
6173 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6174 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6175 setattr(self.op, attr, None)
6177 # Past this point, any flag change to False means a transition
6178 # away from the respective state, as only real changes are kept
6180 # TODO: We might query the real power state if it supports OOB
6181 if _SupportsOob(self.cfg, node):
6182 if self.op.offline is False and not (node.powered or
6183 self.op.powered is True):
6184 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6185 " offline status can be reset") %
6186 self.op.node_name, errors.ECODE_STATE)
6187 elif self.op.powered is not None:
6188 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6189 " as it does not support out-of-band"
6190 " handling") % self.op.node_name,
6193 # If we're being deofflined/drained, we'll MC ourself if needed
6194 if (self.op.drained is False or self.op.offline is False or
6195 (self.op.master_capable and not node.master_capable)):
6196 if _DecideSelfPromotion(self):
6197 self.op.master_candidate = True
6198 self.LogInfo("Auto-promoting node to master candidate")
6200 # If we're no longer master capable, we'll demote ourselves from MC
6201 if self.op.master_capable is False and node.master_candidate:
6202 self.LogInfo("Demoting from master candidate")
6203 self.op.master_candidate = False
6206 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6207 if self.op.master_candidate:
6208 new_role = self._ROLE_CANDIDATE
6209 elif self.op.drained:
6210 new_role = self._ROLE_DRAINED
6211 elif self.op.offline:
6212 new_role = self._ROLE_OFFLINE
6213 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6214 # False is still in new flags, which means we're un-setting (the
6216 new_role = self._ROLE_REGULAR
6217 else: # no new flags, nothing, keep old role
6220 self.new_role = new_role
6222 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6223 # Trying to transition out of offline status
6224 result = self.rpc.call_version([node.name])[node.name]
6226 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6227 " to report its version: %s" %
6228 (node.name, result.fail_msg),
6231 self.LogWarning("Transitioning node from offline to online state"
6232 " without using re-add. Please make sure the node"
6235 # When changing the secondary ip, verify if this is a single-homed to
6236 # multi-homed transition or vice versa, and apply the relevant
6238 if self.op.secondary_ip:
6239 # Ok even without locking, because this can't be changed by any LU
6240 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6241 master_singlehomed = master.secondary_ip == master.primary_ip
6242 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6243 if self.op.force and node.name == master.name:
6244 self.LogWarning("Transitioning from single-homed to multi-homed"
6245 " cluster; all nodes will require a secondary IP"
6248 raise errors.OpPrereqError("Changing the secondary ip on a"
6249 " single-homed cluster requires the"
6250 " --force option to be passed, and the"
6251 " target node to be the master",
6253 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6254 if self.op.force and node.name == master.name:
6255 self.LogWarning("Transitioning from multi-homed to single-homed"
6256 " cluster; secondary IP addresses will have to be"
6259 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6260 " same as the primary IP on a multi-homed"
6261 " cluster, unless the --force option is"
6262 " passed, and the target node is the"
6263 " master", errors.ECODE_INVAL)
6265 assert not (frozenset(affected_instances) -
6266 self.owned_locks(locking.LEVEL_INSTANCE))
6269 if affected_instances:
6270 msg = ("Cannot change secondary IP address: offline node has"
6271 " instances (%s) configured to use it" %
6272 utils.CommaJoin(affected_instances.keys()))
6273 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6275 # On online nodes, check that no instances are running, and that
6276 # the node has the new ip and we can reach it.
6277 for instance in affected_instances.values():
6278 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6279 msg="cannot change secondary ip")
6281 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6282 if master.name != node.name:
6283 # check reachability from master secondary ip to new secondary ip
6284 if not netutils.TcpPing(self.op.secondary_ip,
6285 constants.DEFAULT_NODED_PORT,
6286 source=master.secondary_ip):
6287 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6288 " based ping to node daemon port",
6289 errors.ECODE_ENVIRON)
6291 if self.op.ndparams:
6292 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6293 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6294 self.new_ndparams = new_ndparams
6296 if self.op.hv_state:
6297 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6298 self.node.hv_state_static)
6300 if self.op.disk_state:
6301 self.new_disk_state = \
6302 _MergeAndVerifyDiskState(self.op.disk_state,
6303 self.node.disk_state_static)
6305 def Exec(self, feedback_fn):
6310 old_role = self.old_role
6311 new_role = self.new_role
6315 if self.op.ndparams:
6316 node.ndparams = self.new_ndparams
6318 if self.op.powered is not None:
6319 node.powered = self.op.powered
6321 if self.op.hv_state:
6322 node.hv_state_static = self.new_hv_state
6324 if self.op.disk_state:
6325 node.disk_state_static = self.new_disk_state
6327 for attr in ["master_capable", "vm_capable"]:
6328 val = getattr(self.op, attr)
6330 setattr(node, attr, val)
6331 result.append((attr, str(val)))
6333 if new_role != old_role:
6334 # Tell the node to demote itself, if no longer MC and not offline
6335 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6336 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6338 self.LogWarning("Node failed to demote itself: %s", msg)
6340 new_flags = self._R2F[new_role]
6341 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6343 result.append((desc, str(nf)))
6344 (node.master_candidate, node.drained, node.offline) = new_flags
6346 # we locked all nodes, we adjust the CP before updating this node
6348 _AdjustCandidatePool(self, [node.name])
6350 if self.op.secondary_ip:
6351 node.secondary_ip = self.op.secondary_ip
6352 result.append(("secondary_ip", self.op.secondary_ip))
6354 # this will trigger configuration file update, if needed
6355 self.cfg.Update(node, feedback_fn)
6357 # this will trigger job queue propagation or cleanup if the mc
6359 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6360 self.context.ReaddNode(node)
6365 class LUNodePowercycle(NoHooksLU):
6366 """Powercycles a node.
6371 def CheckArguments(self):
6372 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6373 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6374 raise errors.OpPrereqError("The node is the master and the force"
6375 " parameter was not set",
6378 def ExpandNames(self):
6379 """Locking for PowercycleNode.
6381 This is a last-resort option and shouldn't block on other
6382 jobs. Therefore, we grab no locks.
6385 self.needed_locks = {}
6387 def Exec(self, feedback_fn):
6391 result = self.rpc.call_node_powercycle(self.op.node_name,
6392 self.cfg.GetHypervisorType())
6393 result.Raise("Failed to schedule the reboot")
6394 return result.payload
6397 class LUClusterQuery(NoHooksLU):
6398 """Query cluster configuration.
6403 def ExpandNames(self):
6404 self.needed_locks = {}
6406 def Exec(self, feedback_fn):
6407 """Return cluster config.
6410 cluster = self.cfg.GetClusterInfo()
6413 # Filter just for enabled hypervisors
6414 for os_name, hv_dict in cluster.os_hvp.items():
6415 os_hvp[os_name] = {}
6416 for hv_name, hv_params in hv_dict.items():
6417 if hv_name in cluster.enabled_hypervisors:
6418 os_hvp[os_name][hv_name] = hv_params
6420 # Convert ip_family to ip_version
6421 primary_ip_version = constants.IP4_VERSION
6422 if cluster.primary_ip_family == netutils.IP6Address.family:
6423 primary_ip_version = constants.IP6_VERSION
6426 "software_version": constants.RELEASE_VERSION,
6427 "protocol_version": constants.PROTOCOL_VERSION,
6428 "config_version": constants.CONFIG_VERSION,
6429 "os_api_version": max(constants.OS_API_VERSIONS),
6430 "export_version": constants.EXPORT_VERSION,
6431 "architecture": runtime.GetArchInfo(),
6432 "name": cluster.cluster_name,
6433 "master": cluster.master_node,
6434 "default_hypervisor": cluster.primary_hypervisor,
6435 "enabled_hypervisors": cluster.enabled_hypervisors,
6436 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6437 for hypervisor_name in cluster.enabled_hypervisors]),
6439 "beparams": cluster.beparams,
6440 "osparams": cluster.osparams,
6441 "ipolicy": cluster.ipolicy,
6442 "nicparams": cluster.nicparams,
6443 "ndparams": cluster.ndparams,
6444 "diskparams": cluster.diskparams,
6445 "candidate_pool_size": cluster.candidate_pool_size,
6446 "master_netdev": cluster.master_netdev,
6447 "master_netmask": cluster.master_netmask,
6448 "use_external_mip_script": cluster.use_external_mip_script,
6449 "volume_group_name": cluster.volume_group_name,
6450 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6451 "file_storage_dir": cluster.file_storage_dir,
6452 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6453 "maintain_node_health": cluster.maintain_node_health,
6454 "ctime": cluster.ctime,
6455 "mtime": cluster.mtime,
6456 "uuid": cluster.uuid,
6457 "tags": list(cluster.GetTags()),
6458 "uid_pool": cluster.uid_pool,
6459 "default_iallocator": cluster.default_iallocator,
6460 "reserved_lvs": cluster.reserved_lvs,
6461 "primary_ip_version": primary_ip_version,
6462 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6463 "hidden_os": cluster.hidden_os,
6464 "blacklisted_os": cluster.blacklisted_os,
6470 class LUClusterConfigQuery(NoHooksLU):
6471 """Return configuration values.
6476 def CheckArguments(self):
6477 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6479 def ExpandNames(self):
6480 self.cq.ExpandNames(self)
6482 def DeclareLocks(self, level):
6483 self.cq.DeclareLocks(self, level)
6485 def Exec(self, feedback_fn):
6486 result = self.cq.OldStyleQuery(self)
6488 assert len(result) == 1
6493 class _ClusterQuery(_QueryBase):
6494 FIELDS = query.CLUSTER_FIELDS
6496 #: Do not sort (there is only one item)
6499 def ExpandNames(self, lu):
6500 lu.needed_locks = {}
6502 # The following variables interact with _QueryBase._GetNames
6503 self.wanted = locking.ALL_SET
6504 self.do_locking = self.use_locking
6507 raise errors.OpPrereqError("Can not use locking for cluster queries",
6510 def DeclareLocks(self, lu, level):
6513 def _GetQueryData(self, lu):
6514 """Computes the list of nodes and their attributes.
6517 # Locking is not used
6518 assert not (compat.any(lu.glm.is_owned(level)
6519 for level in locking.LEVELS
6520 if level != locking.LEVEL_CLUSTER) or
6521 self.do_locking or self.use_locking)
6523 if query.CQ_CONFIG in self.requested_data:
6524 cluster = lu.cfg.GetClusterInfo()
6526 cluster = NotImplemented
6528 if query.CQ_QUEUE_DRAINED in self.requested_data:
6529 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6531 drain_flag = NotImplemented
6533 if query.CQ_WATCHER_PAUSE in self.requested_data:
6534 master_name = lu.cfg.GetMasterNode()
6536 result = lu.rpc.call_get_watcher_pause(master_name)
6537 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6540 watcher_pause = result.payload
6542 watcher_pause = NotImplemented
6544 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6547 class LUInstanceActivateDisks(NoHooksLU):
6548 """Bring up an instance's disks.
6553 def ExpandNames(self):
6554 self._ExpandAndLockInstance()
6555 self.needed_locks[locking.LEVEL_NODE] = []
6556 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6558 def DeclareLocks(self, level):
6559 if level == locking.LEVEL_NODE:
6560 self._LockInstancesNodes()
6562 def CheckPrereq(self):
6563 """Check prerequisites.
6565 This checks that the instance is in the cluster.
6568 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6569 assert self.instance is not None, \
6570 "Cannot retrieve locked instance %s" % self.op.instance_name
6571 _CheckNodeOnline(self, self.instance.primary_node)
6573 def Exec(self, feedback_fn):
6574 """Activate the disks.
6577 disks_ok, disks_info = \
6578 _AssembleInstanceDisks(self, self.instance,
6579 ignore_size=self.op.ignore_size)
6581 raise errors.OpExecError("Cannot activate block devices")
6583 if self.op.wait_for_sync:
6584 if not _WaitForSync(self, self.instance):
6585 raise errors.OpExecError("Some disks of the instance are degraded!")
6590 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6592 """Prepare the block devices for an instance.
6594 This sets up the block devices on all nodes.
6596 @type lu: L{LogicalUnit}
6597 @param lu: the logical unit on whose behalf we execute
6598 @type instance: L{objects.Instance}
6599 @param instance: the instance for whose disks we assemble
6600 @type disks: list of L{objects.Disk} or None
6601 @param disks: which disks to assemble (or all, if None)
6602 @type ignore_secondaries: boolean
6603 @param ignore_secondaries: if true, errors on secondary nodes
6604 won't result in an error return from the function
6605 @type ignore_size: boolean
6606 @param ignore_size: if true, the current known size of the disk
6607 will not be used during the disk activation, useful for cases
6608 when the size is wrong
6609 @return: False if the operation failed, otherwise a list of
6610 (host, instance_visible_name, node_visible_name)
6611 with the mapping from node devices to instance devices
6616 iname = instance.name
6617 disks = _ExpandCheckDisks(instance, disks)
6619 # With the two passes mechanism we try to reduce the window of
6620 # opportunity for the race condition of switching DRBD to primary
6621 # before handshaking occured, but we do not eliminate it
6623 # The proper fix would be to wait (with some limits) until the
6624 # connection has been made and drbd transitions from WFConnection
6625 # into any other network-connected state (Connected, SyncTarget,
6628 # 1st pass, assemble on all nodes in secondary mode
6629 for idx, inst_disk in enumerate(disks):
6630 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6632 node_disk = node_disk.Copy()
6633 node_disk.UnsetSize()
6634 lu.cfg.SetDiskID(node_disk, node)
6635 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6637 msg = result.fail_msg
6639 is_offline_secondary = (node in instance.secondary_nodes and
6641 lu.LogWarning("Could not prepare block device %s on node %s"
6642 " (is_primary=False, pass=1): %s",
6643 inst_disk.iv_name, node, msg)
6644 if not (ignore_secondaries or is_offline_secondary):
6647 # FIXME: race condition on drbd migration to primary
6649 # 2nd pass, do only the primary node
6650 for idx, inst_disk in enumerate(disks):
6653 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6654 if node != instance.primary_node:
6657 node_disk = node_disk.Copy()
6658 node_disk.UnsetSize()
6659 lu.cfg.SetDiskID(node_disk, node)
6660 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6662 msg = result.fail_msg
6664 lu.LogWarning("Could not prepare block device %s on node %s"
6665 " (is_primary=True, pass=2): %s",
6666 inst_disk.iv_name, node, msg)
6669 dev_path = result.payload
6671 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6673 # leave the disks configured for the primary node
6674 # this is a workaround that would be fixed better by
6675 # improving the logical/physical id handling
6677 lu.cfg.SetDiskID(disk, instance.primary_node)
6679 return disks_ok, device_info
6682 def _StartInstanceDisks(lu, instance, force):
6683 """Start the disks of an instance.
6686 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6687 ignore_secondaries=force)
6689 _ShutdownInstanceDisks(lu, instance)
6690 if force is not None and not force:
6692 hint=("If the message above refers to a secondary node,"
6693 " you can retry the operation using '--force'"))
6694 raise errors.OpExecError("Disk consistency error")
6697 class LUInstanceDeactivateDisks(NoHooksLU):
6698 """Shutdown an instance's disks.
6703 def ExpandNames(self):
6704 self._ExpandAndLockInstance()
6705 self.needed_locks[locking.LEVEL_NODE] = []
6706 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6708 def DeclareLocks(self, level):
6709 if level == locking.LEVEL_NODE:
6710 self._LockInstancesNodes()
6712 def CheckPrereq(self):
6713 """Check prerequisites.
6715 This checks that the instance is in the cluster.
6718 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6719 assert self.instance is not None, \
6720 "Cannot retrieve locked instance %s" % self.op.instance_name
6722 def Exec(self, feedback_fn):
6723 """Deactivate the disks
6726 instance = self.instance
6728 _ShutdownInstanceDisks(self, instance)
6730 _SafeShutdownInstanceDisks(self, instance)
6733 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6734 """Shutdown block devices of an instance.
6736 This function checks if an instance is running, before calling
6737 _ShutdownInstanceDisks.
6740 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6741 _ShutdownInstanceDisks(lu, instance, disks=disks)
6744 def _ExpandCheckDisks(instance, disks):
6745 """Return the instance disks selected by the disks list
6747 @type disks: list of L{objects.Disk} or None
6748 @param disks: selected disks
6749 @rtype: list of L{objects.Disk}
6750 @return: selected instance disks to act on
6754 return instance.disks
6756 if not set(disks).issubset(instance.disks):
6757 raise errors.ProgrammerError("Can only act on disks belonging to the"
6762 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6763 """Shutdown block devices of an instance.
6765 This does the shutdown on all nodes of the instance.
6767 If the ignore_primary is false, errors on the primary node are
6772 disks = _ExpandCheckDisks(instance, disks)
6775 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6776 lu.cfg.SetDiskID(top_disk, node)
6777 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6778 msg = result.fail_msg
6780 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6781 disk.iv_name, node, msg)
6782 if ((node == instance.primary_node and not ignore_primary) or
6783 (node != instance.primary_node and not result.offline)):
6788 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6789 """Checks if a node has enough free memory.
6791 This function checks if a given node has the needed amount of free
6792 memory. In case the node has less memory or we cannot get the
6793 information from the node, this function raises an OpPrereqError
6796 @type lu: C{LogicalUnit}
6797 @param lu: a logical unit from which we get configuration data
6799 @param node: the node to check
6800 @type reason: C{str}
6801 @param reason: string to use in the error message
6802 @type requested: C{int}
6803 @param requested: the amount of memory in MiB to check for
6804 @type hypervisor_name: C{str}
6805 @param hypervisor_name: the hypervisor to ask for memory stats
6807 @return: node current free memory
6808 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6809 we cannot check the node
6812 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6813 nodeinfo[node].Raise("Can't get data from node %s" % node,
6814 prereq=True, ecode=errors.ECODE_ENVIRON)
6815 (_, _, (hv_info, )) = nodeinfo[node].payload
6817 free_mem = hv_info.get("memory_free", None)
6818 if not isinstance(free_mem, int):
6819 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6820 " was '%s'" % (node, free_mem),
6821 errors.ECODE_ENVIRON)
6822 if requested > free_mem:
6823 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6824 " needed %s MiB, available %s MiB" %
6825 (node, reason, requested, free_mem),
6830 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6831 """Checks if nodes have enough free disk space in all the VGs.
6833 This function checks if all given nodes have the needed amount of
6834 free disk. In case any node has less disk or we cannot get the
6835 information from the node, this function raises an OpPrereqError
6838 @type lu: C{LogicalUnit}
6839 @param lu: a logical unit from which we get configuration data
6840 @type nodenames: C{list}
6841 @param nodenames: the list of node names to check
6842 @type req_sizes: C{dict}
6843 @param req_sizes: the hash of vg and corresponding amount of disk in
6845 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6846 or we cannot check the node
6849 for vg, req_size in req_sizes.items():
6850 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6853 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6854 """Checks if nodes have enough free disk space in the specified VG.
6856 This function checks if all given nodes have the needed amount of
6857 free disk. In case any node has less disk or we cannot get the
6858 information from the node, this function raises an OpPrereqError
6861 @type lu: C{LogicalUnit}
6862 @param lu: a logical unit from which we get configuration data
6863 @type nodenames: C{list}
6864 @param nodenames: the list of node names to check
6866 @param vg: the volume group to check
6867 @type requested: C{int}
6868 @param requested: the amount of disk in MiB to check for
6869 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6870 or we cannot check the node
6873 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6874 for node in nodenames:
6875 info = nodeinfo[node]
6876 info.Raise("Cannot get current information from node %s" % node,
6877 prereq=True, ecode=errors.ECODE_ENVIRON)
6878 (_, (vg_info, ), _) = info.payload
6879 vg_free = vg_info.get("vg_free", None)
6880 if not isinstance(vg_free, int):
6881 raise errors.OpPrereqError("Can't compute free disk space on node"
6882 " %s for vg %s, result was '%s'" %
6883 (node, vg, vg_free), errors.ECODE_ENVIRON)
6884 if requested > vg_free:
6885 raise errors.OpPrereqError("Not enough disk space on target node %s"
6886 " vg %s: required %d MiB, available %d MiB" %
6887 (node, vg, requested, vg_free),
6891 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6892 """Checks if nodes have enough physical CPUs
6894 This function checks if all given nodes have the needed number of
6895 physical CPUs. In case any node has less CPUs or we cannot get the
6896 information from the node, this function raises an OpPrereqError
6899 @type lu: C{LogicalUnit}
6900 @param lu: a logical unit from which we get configuration data
6901 @type nodenames: C{list}
6902 @param nodenames: the list of node names to check
6903 @type requested: C{int}
6904 @param requested: the minimum acceptable number of physical CPUs
6905 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6906 or we cannot check the node
6909 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6910 for node in nodenames:
6911 info = nodeinfo[node]
6912 info.Raise("Cannot get current information from node %s" % node,
6913 prereq=True, ecode=errors.ECODE_ENVIRON)
6914 (_, _, (hv_info, )) = info.payload
6915 num_cpus = hv_info.get("cpu_total", None)
6916 if not isinstance(num_cpus, int):
6917 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6918 " on node %s, result was '%s'" %
6919 (node, num_cpus), errors.ECODE_ENVIRON)
6920 if requested > num_cpus:
6921 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6922 "required" % (node, num_cpus, requested),
6926 class LUInstanceStartup(LogicalUnit):
6927 """Starts an instance.
6930 HPATH = "instance-start"
6931 HTYPE = constants.HTYPE_INSTANCE
6934 def CheckArguments(self):
6936 if self.op.beparams:
6937 # fill the beparams dict
6938 objects.UpgradeBeParams(self.op.beparams)
6939 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6941 def ExpandNames(self):
6942 self._ExpandAndLockInstance()
6943 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6945 def DeclareLocks(self, level):
6946 if level == locking.LEVEL_NODE_RES:
6947 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6949 def BuildHooksEnv(self):
6952 This runs on master, primary and secondary nodes of the instance.
6956 "FORCE": self.op.force,
6959 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6963 def BuildHooksNodes(self):
6964 """Build hooks nodes.
6967 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6970 def CheckPrereq(self):
6971 """Check prerequisites.
6973 This checks that the instance is in the cluster.
6976 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6977 assert self.instance is not None, \
6978 "Cannot retrieve locked instance %s" % self.op.instance_name
6981 if self.op.hvparams:
6982 # check hypervisor parameter syntax (locally)
6983 cluster = self.cfg.GetClusterInfo()
6984 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6985 filled_hvp = cluster.FillHV(instance)
6986 filled_hvp.update(self.op.hvparams)
6987 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6988 hv_type.CheckParameterSyntax(filled_hvp)
6989 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6991 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6993 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6995 if self.primary_offline and self.op.ignore_offline_nodes:
6996 self.LogWarning("Ignoring offline primary node")
6998 if self.op.hvparams or self.op.beparams:
6999 self.LogWarning("Overridden parameters are ignored")
7001 _CheckNodeOnline(self, instance.primary_node)
7003 bep = self.cfg.GetClusterInfo().FillBE(instance)
7004 bep.update(self.op.beparams)
7006 # check bridges existence
7007 _CheckInstanceBridgesExist(self, instance)
7009 remote_info = self.rpc.call_instance_info(instance.primary_node,
7011 instance.hypervisor)
7012 remote_info.Raise("Error checking node %s" % instance.primary_node,
7013 prereq=True, ecode=errors.ECODE_ENVIRON)
7014 if not remote_info.payload: # not running already
7015 _CheckNodeFreeMemory(self, instance.primary_node,
7016 "starting instance %s" % instance.name,
7017 bep[constants.BE_MINMEM], instance.hypervisor)
7019 def Exec(self, feedback_fn):
7020 """Start the instance.
7023 instance = self.instance
7024 force = self.op.force
7026 if not self.op.no_remember:
7027 self.cfg.MarkInstanceUp(instance.name)
7029 if self.primary_offline:
7030 assert self.op.ignore_offline_nodes
7031 self.LogInfo("Primary node offline, marked instance as started")
7033 node_current = instance.primary_node
7035 _StartInstanceDisks(self, instance, force)
7038 self.rpc.call_instance_start(node_current,
7039 (instance, self.op.hvparams,
7041 self.op.startup_paused)
7042 msg = result.fail_msg
7044 _ShutdownInstanceDisks(self, instance)
7045 raise errors.OpExecError("Could not start instance: %s" % msg)
7048 class LUInstanceReboot(LogicalUnit):
7049 """Reboot an instance.
7052 HPATH = "instance-reboot"
7053 HTYPE = constants.HTYPE_INSTANCE
7056 def ExpandNames(self):
7057 self._ExpandAndLockInstance()
7059 def BuildHooksEnv(self):
7062 This runs on master, primary and secondary nodes of the instance.
7066 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7067 "REBOOT_TYPE": self.op.reboot_type,
7068 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7071 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7075 def BuildHooksNodes(self):
7076 """Build hooks nodes.
7079 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7082 def CheckPrereq(self):
7083 """Check prerequisites.
7085 This checks that the instance is in the cluster.
7088 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7089 assert self.instance is not None, \
7090 "Cannot retrieve locked instance %s" % self.op.instance_name
7091 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7092 _CheckNodeOnline(self, instance.primary_node)
7094 # check bridges existence
7095 _CheckInstanceBridgesExist(self, instance)
7097 def Exec(self, feedback_fn):
7098 """Reboot the instance.
7101 instance = self.instance
7102 ignore_secondaries = self.op.ignore_secondaries
7103 reboot_type = self.op.reboot_type
7105 remote_info = self.rpc.call_instance_info(instance.primary_node,
7107 instance.hypervisor)
7108 remote_info.Raise("Error checking node %s" % instance.primary_node)
7109 instance_running = bool(remote_info.payload)
7111 node_current = instance.primary_node
7113 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7114 constants.INSTANCE_REBOOT_HARD]:
7115 for disk in instance.disks:
7116 self.cfg.SetDiskID(disk, node_current)
7117 result = self.rpc.call_instance_reboot(node_current, instance,
7119 self.op.shutdown_timeout)
7120 result.Raise("Could not reboot instance")
7122 if instance_running:
7123 result = self.rpc.call_instance_shutdown(node_current, instance,
7124 self.op.shutdown_timeout)
7125 result.Raise("Could not shutdown instance for full reboot")
7126 _ShutdownInstanceDisks(self, instance)
7128 self.LogInfo("Instance %s was already stopped, starting now",
7130 _StartInstanceDisks(self, instance, ignore_secondaries)
7131 result = self.rpc.call_instance_start(node_current,
7132 (instance, None, None), False)
7133 msg = result.fail_msg
7135 _ShutdownInstanceDisks(self, instance)
7136 raise errors.OpExecError("Could not start instance for"
7137 " full reboot: %s" % msg)
7139 self.cfg.MarkInstanceUp(instance.name)
7142 class LUInstanceShutdown(LogicalUnit):
7143 """Shutdown an instance.
7146 HPATH = "instance-stop"
7147 HTYPE = constants.HTYPE_INSTANCE
7150 def ExpandNames(self):
7151 self._ExpandAndLockInstance()
7153 def BuildHooksEnv(self):
7156 This runs on master, primary and secondary nodes of the instance.
7159 env = _BuildInstanceHookEnvByObject(self, self.instance)
7160 env["TIMEOUT"] = self.op.timeout
7163 def BuildHooksNodes(self):
7164 """Build hooks nodes.
7167 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7170 def CheckPrereq(self):
7171 """Check prerequisites.
7173 This checks that the instance is in the cluster.
7176 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7177 assert self.instance is not None, \
7178 "Cannot retrieve locked instance %s" % self.op.instance_name
7180 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7182 self.primary_offline = \
7183 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7185 if self.primary_offline and self.op.ignore_offline_nodes:
7186 self.LogWarning("Ignoring offline primary node")
7188 _CheckNodeOnline(self, self.instance.primary_node)
7190 def Exec(self, feedback_fn):
7191 """Shutdown the instance.
7194 instance = self.instance
7195 node_current = instance.primary_node
7196 timeout = self.op.timeout
7198 if not self.op.no_remember:
7199 self.cfg.MarkInstanceDown(instance.name)
7201 if self.primary_offline:
7202 assert self.op.ignore_offline_nodes
7203 self.LogInfo("Primary node offline, marked instance as stopped")
7205 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7206 msg = result.fail_msg
7208 self.LogWarning("Could not shutdown instance: %s", msg)
7210 _ShutdownInstanceDisks(self, instance)
7213 class LUInstanceReinstall(LogicalUnit):
7214 """Reinstall an instance.
7217 HPATH = "instance-reinstall"
7218 HTYPE = constants.HTYPE_INSTANCE
7221 def ExpandNames(self):
7222 self._ExpandAndLockInstance()
7224 def BuildHooksEnv(self):
7227 This runs on master, primary and secondary nodes of the instance.
7230 return _BuildInstanceHookEnvByObject(self, self.instance)
7232 def BuildHooksNodes(self):
7233 """Build hooks nodes.
7236 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7239 def CheckPrereq(self):
7240 """Check prerequisites.
7242 This checks that the instance is in the cluster and is not running.
7245 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7246 assert instance is not None, \
7247 "Cannot retrieve locked instance %s" % self.op.instance_name
7248 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7249 " offline, cannot reinstall")
7251 if instance.disk_template == constants.DT_DISKLESS:
7252 raise errors.OpPrereqError("Instance '%s' has no disks" %
7253 self.op.instance_name,
7255 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7257 if self.op.os_type is not None:
7259 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7260 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7261 instance_os = self.op.os_type
7263 instance_os = instance.os
7265 nodelist = list(instance.all_nodes)
7267 if self.op.osparams:
7268 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7269 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7270 self.os_inst = i_osdict # the new dict (without defaults)
7274 self.instance = instance
7276 def Exec(self, feedback_fn):
7277 """Reinstall the instance.
7280 inst = self.instance
7282 if self.op.os_type is not None:
7283 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7284 inst.os = self.op.os_type
7285 # Write to configuration
7286 self.cfg.Update(inst, feedback_fn)
7288 _StartInstanceDisks(self, inst, None)
7290 feedback_fn("Running the instance OS create scripts...")
7291 # FIXME: pass debug option from opcode to backend
7292 result = self.rpc.call_instance_os_add(inst.primary_node,
7293 (inst, self.os_inst), True,
7294 self.op.debug_level)
7295 result.Raise("Could not install OS for instance %s on node %s" %
7296 (inst.name, inst.primary_node))
7298 _ShutdownInstanceDisks(self, inst)
7301 class LUInstanceRecreateDisks(LogicalUnit):
7302 """Recreate an instance's missing disks.
7305 HPATH = "instance-recreate-disks"
7306 HTYPE = constants.HTYPE_INSTANCE
7309 _MODIFYABLE = frozenset([
7310 constants.IDISK_SIZE,
7311 constants.IDISK_MODE,
7314 # New or changed disk parameters may have different semantics
7315 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7316 constants.IDISK_ADOPT,
7318 # TODO: Implement support changing VG while recreating
7320 constants.IDISK_METAVG,
7323 def _RunAllocator(self):
7324 """Run the allocator based on input opcode.
7327 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7330 # The allocator should actually run in "relocate" mode, but current
7331 # allocators don't support relocating all the nodes of an instance at
7332 # the same time. As a workaround we use "allocate" mode, but this is
7333 # suboptimal for two reasons:
7334 # - The instance name passed to the allocator is present in the list of
7335 # existing instances, so there could be a conflict within the
7336 # internal structures of the allocator. This doesn't happen with the
7337 # current allocators, but it's a liability.
7338 # - The allocator counts the resources used by the instance twice: once
7339 # because the instance exists already, and once because it tries to
7340 # allocate a new instance.
7341 # The allocator could choose some of the nodes on which the instance is
7342 # running, but that's not a problem. If the instance nodes are broken,
7343 # they should be already be marked as drained or offline, and hence
7344 # skipped by the allocator. If instance disks have been lost for other
7345 # reasons, then recreating the disks on the same nodes should be fine.
7346 disk_template = self.instance.disk_template
7347 spindle_use = be_full[constants.BE_SPINDLE_USE]
7348 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7349 disk_template=disk_template,
7350 tags=list(self.instance.GetTags()),
7351 os=self.instance.os,
7353 vcpus=be_full[constants.BE_VCPUS],
7354 memory=be_full[constants.BE_MAXMEM],
7355 spindle_use=spindle_use,
7356 disks=[{constants.IDISK_SIZE: d.size,
7357 constants.IDISK_MODE: d.mode}
7358 for d in self.instance.disks],
7359 hypervisor=self.instance.hypervisor)
7360 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7362 ial.Run(self.op.iallocator)
7364 assert req.RequiredNodes() == len(self.instance.all_nodes)
7367 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7368 " %s" % (self.op.iallocator, ial.info),
7371 self.op.nodes = ial.result
7372 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7373 self.op.instance_name, self.op.iallocator,
7374 utils.CommaJoin(ial.result))
7376 def CheckArguments(self):
7377 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7378 # Normalize and convert deprecated list of disk indices
7379 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7381 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7383 raise errors.OpPrereqError("Some disks have been specified more than"
7384 " once: %s" % utils.CommaJoin(duplicates),
7387 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7388 # when neither iallocator nor nodes are specified
7389 if self.op.iallocator or self.op.nodes:
7390 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7392 for (idx, params) in self.op.disks:
7393 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7394 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7396 raise errors.OpPrereqError("Parameters for disk %s try to change"
7397 " unmodifyable parameter(s): %s" %
7398 (idx, utils.CommaJoin(unsupported)),
7401 def ExpandNames(self):
7402 self._ExpandAndLockInstance()
7403 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7406 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7407 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7409 self.needed_locks[locking.LEVEL_NODE] = []
7410 if self.op.iallocator:
7411 # iallocator will select a new node in the same group
7412 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7413 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7415 self.needed_locks[locking.LEVEL_NODE_RES] = []
7417 def DeclareLocks(self, level):
7418 if level == locking.LEVEL_NODEGROUP:
7419 assert self.op.iallocator is not None
7420 assert not self.op.nodes
7421 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7422 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7423 # Lock the primary group used by the instance optimistically; this
7424 # requires going via the node before it's locked, requiring
7425 # verification later on
7426 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7427 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7429 elif level == locking.LEVEL_NODE:
7430 # If an allocator is used, then we lock all the nodes in the current
7431 # instance group, as we don't know yet which ones will be selected;
7432 # if we replace the nodes without using an allocator, locks are
7433 # already declared in ExpandNames; otherwise, we need to lock all the
7434 # instance nodes for disk re-creation
7435 if self.op.iallocator:
7436 assert not self.op.nodes
7437 assert not self.needed_locks[locking.LEVEL_NODE]
7438 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7440 # Lock member nodes of the group of the primary node
7441 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7442 self.needed_locks[locking.LEVEL_NODE].extend(
7443 self.cfg.GetNodeGroup(group_uuid).members)
7445 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7446 elif not self.op.nodes:
7447 self._LockInstancesNodes(primary_only=False)
7448 elif level == locking.LEVEL_NODE_RES:
7450 self.needed_locks[locking.LEVEL_NODE_RES] = \
7451 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7453 def BuildHooksEnv(self):
7456 This runs on master, primary and secondary nodes of the instance.
7459 return _BuildInstanceHookEnvByObject(self, self.instance)
7461 def BuildHooksNodes(self):
7462 """Build hooks nodes.
7465 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7468 def CheckPrereq(self):
7469 """Check prerequisites.
7471 This checks that the instance is in the cluster and is not running.
7474 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7475 assert instance is not None, \
7476 "Cannot retrieve locked instance %s" % self.op.instance_name
7478 if len(self.op.nodes) != len(instance.all_nodes):
7479 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7480 " %d replacement nodes were specified" %
7481 (instance.name, len(instance.all_nodes),
7482 len(self.op.nodes)),
7484 assert instance.disk_template != constants.DT_DRBD8 or \
7485 len(self.op.nodes) == 2
7486 assert instance.disk_template != constants.DT_PLAIN or \
7487 len(self.op.nodes) == 1
7488 primary_node = self.op.nodes[0]
7490 primary_node = instance.primary_node
7491 if not self.op.iallocator:
7492 _CheckNodeOnline(self, primary_node)
7494 if instance.disk_template == constants.DT_DISKLESS:
7495 raise errors.OpPrereqError("Instance '%s' has no disks" %
7496 self.op.instance_name, errors.ECODE_INVAL)
7498 # Verify if node group locks are still correct
7499 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7501 # Node group locks are acquired only for the primary node (and only
7502 # when the allocator is used)
7503 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7506 # if we replace nodes *and* the old primary is offline, we don't
7507 # check the instance state
7508 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7509 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7510 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7511 msg="cannot recreate disks")
7514 self.disks = dict(self.op.disks)
7516 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7518 maxidx = max(self.disks.keys())
7519 if maxidx >= len(instance.disks):
7520 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7523 if ((self.op.nodes or self.op.iallocator) and
7524 sorted(self.disks.keys()) != range(len(instance.disks))):
7525 raise errors.OpPrereqError("Can't recreate disks partially and"
7526 " change the nodes at the same time",
7529 self.instance = instance
7531 if self.op.iallocator:
7532 self._RunAllocator()
7533 # Release unneeded node and node resource locks
7534 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7535 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7536 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7538 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7540 def Exec(self, feedback_fn):
7541 """Recreate the disks.
7544 instance = self.instance
7546 assert (self.owned_locks(locking.LEVEL_NODE) ==
7547 self.owned_locks(locking.LEVEL_NODE_RES))
7550 mods = [] # keeps track of needed changes
7552 for idx, disk in enumerate(instance.disks):
7554 changes = self.disks[idx]
7556 # Disk should not be recreated
7560 # update secondaries for disks, if needed
7561 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7562 # need to update the nodes and minors
7563 assert len(self.op.nodes) == 2
7564 assert len(disk.logical_id) == 6 # otherwise disk internals
7566 (_, _, old_port, _, _, old_secret) = disk.logical_id
7567 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7568 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7569 new_minors[0], new_minors[1], old_secret)
7570 assert len(disk.logical_id) == len(new_id)
7574 mods.append((idx, new_id, changes))
7576 # now that we have passed all asserts above, we can apply the mods
7577 # in a single run (to avoid partial changes)
7578 for idx, new_id, changes in mods:
7579 disk = instance.disks[idx]
7580 if new_id is not None:
7581 assert disk.dev_type == constants.LD_DRBD8
7582 disk.logical_id = new_id
7584 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7585 mode=changes.get(constants.IDISK_MODE, None))
7587 # change primary node, if needed
7589 instance.primary_node = self.op.nodes[0]
7590 self.LogWarning("Changing the instance's nodes, you will have to"
7591 " remove any disks left on the older nodes manually")
7594 self.cfg.Update(instance, feedback_fn)
7596 # All touched nodes must be locked
7597 mylocks = self.owned_locks(locking.LEVEL_NODE)
7598 assert mylocks.issuperset(frozenset(instance.all_nodes))
7599 _CreateDisks(self, instance, to_skip=to_skip)
7602 class LUInstanceRename(LogicalUnit):
7603 """Rename an instance.
7606 HPATH = "instance-rename"
7607 HTYPE = constants.HTYPE_INSTANCE
7609 def CheckArguments(self):
7613 if self.op.ip_check and not self.op.name_check:
7614 # TODO: make the ip check more flexible and not depend on the name check
7615 raise errors.OpPrereqError("IP address check requires a name check",
7618 def BuildHooksEnv(self):
7621 This runs on master, primary and secondary nodes of the instance.
7624 env = _BuildInstanceHookEnvByObject(self, self.instance)
7625 env["INSTANCE_NEW_NAME"] = self.op.new_name
7628 def BuildHooksNodes(self):
7629 """Build hooks nodes.
7632 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7635 def CheckPrereq(self):
7636 """Check prerequisites.
7638 This checks that the instance is in the cluster and is not running.
7641 self.op.instance_name = _ExpandInstanceName(self.cfg,
7642 self.op.instance_name)
7643 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7644 assert instance is not None
7645 _CheckNodeOnline(self, instance.primary_node)
7646 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7647 msg="cannot rename")
7648 self.instance = instance
7650 new_name = self.op.new_name
7651 if self.op.name_check:
7652 hostname = _CheckHostnameSane(self, new_name)
7653 new_name = self.op.new_name = hostname.name
7654 if (self.op.ip_check and
7655 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7656 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7657 (hostname.ip, new_name),
7658 errors.ECODE_NOTUNIQUE)
7660 instance_list = self.cfg.GetInstanceList()
7661 if new_name in instance_list and new_name != instance.name:
7662 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7663 new_name, errors.ECODE_EXISTS)
7665 def Exec(self, feedback_fn):
7666 """Rename the instance.
7669 inst = self.instance
7670 old_name = inst.name
7672 rename_file_storage = False
7673 if (inst.disk_template in constants.DTS_FILEBASED and
7674 self.op.new_name != inst.name):
7675 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7676 rename_file_storage = True
7678 self.cfg.RenameInstance(inst.name, self.op.new_name)
7679 # Change the instance lock. This is definitely safe while we hold the BGL.
7680 # Otherwise the new lock would have to be added in acquired mode.
7682 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7683 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7684 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7686 # re-read the instance from the configuration after rename
7687 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7689 if rename_file_storage:
7690 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7691 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7692 old_file_storage_dir,
7693 new_file_storage_dir)
7694 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7695 " (but the instance has been renamed in Ganeti)" %
7696 (inst.primary_node, old_file_storage_dir,
7697 new_file_storage_dir))
7699 _StartInstanceDisks(self, inst, None)
7700 # update info on disks
7701 info = _GetInstanceInfoText(inst)
7702 for (idx, disk) in enumerate(inst.disks):
7703 for node in inst.all_nodes:
7704 self.cfg.SetDiskID(disk, node)
7705 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7707 self.LogWarning("Error setting info on node %s for disk %s: %s",
7708 node, idx, result.fail_msg)
7710 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7711 old_name, self.op.debug_level)
7712 msg = result.fail_msg
7714 msg = ("Could not run OS rename script for instance %s on node %s"
7715 " (but the instance has been renamed in Ganeti): %s" %
7716 (inst.name, inst.primary_node, msg))
7717 self.LogWarning(msg)
7719 _ShutdownInstanceDisks(self, inst)
7724 class LUInstanceRemove(LogicalUnit):
7725 """Remove an instance.
7728 HPATH = "instance-remove"
7729 HTYPE = constants.HTYPE_INSTANCE
7732 def ExpandNames(self):
7733 self._ExpandAndLockInstance()
7734 self.needed_locks[locking.LEVEL_NODE] = []
7735 self.needed_locks[locking.LEVEL_NODE_RES] = []
7736 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7738 def DeclareLocks(self, level):
7739 if level == locking.LEVEL_NODE:
7740 self._LockInstancesNodes()
7741 elif level == locking.LEVEL_NODE_RES:
7743 self.needed_locks[locking.LEVEL_NODE_RES] = \
7744 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7746 def BuildHooksEnv(self):
7749 This runs on master, primary and secondary nodes of the instance.
7752 env = _BuildInstanceHookEnvByObject(self, self.instance)
7753 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7756 def BuildHooksNodes(self):
7757 """Build hooks nodes.
7760 nl = [self.cfg.GetMasterNode()]
7761 nl_post = list(self.instance.all_nodes) + nl
7762 return (nl, nl_post)
7764 def CheckPrereq(self):
7765 """Check prerequisites.
7767 This checks that the instance is in the cluster.
7770 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7771 assert self.instance is not None, \
7772 "Cannot retrieve locked instance %s" % self.op.instance_name
7774 def Exec(self, feedback_fn):
7775 """Remove the instance.
7778 instance = self.instance
7779 logging.info("Shutting down instance %s on node %s",
7780 instance.name, instance.primary_node)
7782 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7783 self.op.shutdown_timeout)
7784 msg = result.fail_msg
7786 if self.op.ignore_failures:
7787 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7789 raise errors.OpExecError("Could not shutdown instance %s on"
7791 (instance.name, instance.primary_node, msg))
7793 assert (self.owned_locks(locking.LEVEL_NODE) ==
7794 self.owned_locks(locking.LEVEL_NODE_RES))
7795 assert not (set(instance.all_nodes) -
7796 self.owned_locks(locking.LEVEL_NODE)), \
7797 "Not owning correct locks"
7799 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7802 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7803 """Utility function to remove an instance.
7806 logging.info("Removing block devices for instance %s", instance.name)
7808 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7809 if not ignore_failures:
7810 raise errors.OpExecError("Can't remove instance's disks")
7811 feedback_fn("Warning: can't remove instance's disks")
7813 logging.info("Removing instance %s out of cluster config", instance.name)
7815 lu.cfg.RemoveInstance(instance.name)
7817 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7818 "Instance lock removal conflict"
7820 # Remove lock for the instance
7821 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7824 class LUInstanceQuery(NoHooksLU):
7825 """Logical unit for querying instances.
7828 # pylint: disable=W0142
7831 def CheckArguments(self):
7832 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7833 self.op.output_fields, self.op.use_locking)
7835 def ExpandNames(self):
7836 self.iq.ExpandNames(self)
7838 def DeclareLocks(self, level):
7839 self.iq.DeclareLocks(self, level)
7841 def Exec(self, feedback_fn):
7842 return self.iq.OldStyleQuery(self)
7845 def _ExpandNamesForMigration(lu):
7846 """Expands names for use with L{TLMigrateInstance}.
7848 @type lu: L{LogicalUnit}
7851 if lu.op.target_node is not None:
7852 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7854 lu.needed_locks[locking.LEVEL_NODE] = []
7855 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7857 lu.needed_locks[locking.LEVEL_NODE_RES] = []
7858 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7860 # The node allocation lock is actually only needed for replicated instances
7861 # (e.g. DRBD8) and if an iallocator is used.
7862 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
7865 def _DeclareLocksForMigration(lu, level):
7866 """Declares locks for L{TLMigrateInstance}.
7868 @type lu: L{LogicalUnit}
7869 @param level: Lock level
7872 if level == locking.LEVEL_NODE_ALLOC:
7873 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
7875 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7877 # Node locks are already declared here rather than at LEVEL_NODE as we need
7878 # the instance object anyway to declare the node allocation lock.
7879 if instance.disk_template in constants.DTS_EXT_MIRROR:
7880 if lu.op.target_node is None:
7881 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7882 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7884 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7886 del lu.recalculate_locks[locking.LEVEL_NODE]
7888 lu._LockInstancesNodes() # pylint: disable=W0212
7890 elif level == locking.LEVEL_NODE:
7891 # Node locks are declared together with the node allocation lock
7892 assert (lu.needed_locks[locking.LEVEL_NODE] or
7893 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
7895 elif level == locking.LEVEL_NODE_RES:
7897 lu.needed_locks[locking.LEVEL_NODE_RES] = \
7898 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7901 class LUInstanceFailover(LogicalUnit):
7902 """Failover an instance.
7905 HPATH = "instance-failover"
7906 HTYPE = constants.HTYPE_INSTANCE
7909 def CheckArguments(self):
7910 """Check the arguments.
7913 self.iallocator = getattr(self.op, "iallocator", None)
7914 self.target_node = getattr(self.op, "target_node", None)
7916 def ExpandNames(self):
7917 self._ExpandAndLockInstance()
7918 _ExpandNamesForMigration(self)
7921 TLMigrateInstance(self, self.op.instance_name, False, True, False,
7922 self.op.ignore_consistency, True,
7923 self.op.shutdown_timeout, self.op.ignore_ipolicy)
7925 self.tasklets = [self._migrater]
7927 def DeclareLocks(self, level):
7928 _DeclareLocksForMigration(self, level)
7930 def BuildHooksEnv(self):
7933 This runs on master, primary and secondary nodes of the instance.
7936 instance = self._migrater.instance
7937 source_node = instance.primary_node
7938 target_node = self.op.target_node
7940 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7941 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7942 "OLD_PRIMARY": source_node,
7943 "NEW_PRIMARY": target_node,
7946 if instance.disk_template in constants.DTS_INT_MIRROR:
7947 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7948 env["NEW_SECONDARY"] = source_node
7950 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7952 env.update(_BuildInstanceHookEnvByObject(self, instance))
7956 def BuildHooksNodes(self):
7957 """Build hooks nodes.
7960 instance = self._migrater.instance
7961 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7962 return (nl, nl + [instance.primary_node])
7965 class LUInstanceMigrate(LogicalUnit):
7966 """Migrate an instance.
7968 This is migration without shutting down, compared to the failover,
7969 which is done with shutdown.
7972 HPATH = "instance-migrate"
7973 HTYPE = constants.HTYPE_INSTANCE
7976 def ExpandNames(self):
7977 self._ExpandAndLockInstance()
7978 _ExpandNamesForMigration(self)
7981 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7982 False, self.op.allow_failover, False,
7983 self.op.allow_runtime_changes,
7984 constants.DEFAULT_SHUTDOWN_TIMEOUT,
7985 self.op.ignore_ipolicy)
7987 self.tasklets = [self._migrater]
7989 def DeclareLocks(self, level):
7990 _DeclareLocksForMigration(self, level)
7992 def BuildHooksEnv(self):
7995 This runs on master, primary and secondary nodes of the instance.
7998 instance = self._migrater.instance
7999 source_node = instance.primary_node
8000 target_node = self.op.target_node
8001 env = _BuildInstanceHookEnvByObject(self, instance)
8003 "MIGRATE_LIVE": self._migrater.live,
8004 "MIGRATE_CLEANUP": self.op.cleanup,
8005 "OLD_PRIMARY": source_node,
8006 "NEW_PRIMARY": target_node,
8007 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8010 if instance.disk_template in constants.DTS_INT_MIRROR:
8011 env["OLD_SECONDARY"] = target_node
8012 env["NEW_SECONDARY"] = source_node
8014 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8018 def BuildHooksNodes(self):
8019 """Build hooks nodes.
8022 instance = self._migrater.instance
8023 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8024 return (nl, nl + [instance.primary_node])
8027 class LUInstanceMove(LogicalUnit):
8028 """Move an instance by data-copying.
8031 HPATH = "instance-move"
8032 HTYPE = constants.HTYPE_INSTANCE
8035 def ExpandNames(self):
8036 self._ExpandAndLockInstance()
8037 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8038 self.op.target_node = target_node
8039 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8040 self.needed_locks[locking.LEVEL_NODE_RES] = []
8041 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8043 def DeclareLocks(self, level):
8044 if level == locking.LEVEL_NODE:
8045 self._LockInstancesNodes(primary_only=True)
8046 elif level == locking.LEVEL_NODE_RES:
8048 self.needed_locks[locking.LEVEL_NODE_RES] = \
8049 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8051 def BuildHooksEnv(self):
8054 This runs on master, primary and secondary nodes of the instance.
8058 "TARGET_NODE": self.op.target_node,
8059 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8061 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8064 def BuildHooksNodes(self):
8065 """Build hooks nodes.
8069 self.cfg.GetMasterNode(),
8070 self.instance.primary_node,
8071 self.op.target_node,
8075 def CheckPrereq(self):
8076 """Check prerequisites.
8078 This checks that the instance is in the cluster.
8081 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8082 assert self.instance is not None, \
8083 "Cannot retrieve locked instance %s" % self.op.instance_name
8085 node = self.cfg.GetNodeInfo(self.op.target_node)
8086 assert node is not None, \
8087 "Cannot retrieve locked node %s" % self.op.target_node
8089 self.target_node = target_node = node.name
8091 if target_node == instance.primary_node:
8092 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8093 (instance.name, target_node),
8096 bep = self.cfg.GetClusterInfo().FillBE(instance)
8098 for idx, dsk in enumerate(instance.disks):
8099 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8100 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8101 " cannot copy" % idx, errors.ECODE_STATE)
8103 _CheckNodeOnline(self, target_node)
8104 _CheckNodeNotDrained(self, target_node)
8105 _CheckNodeVmCapable(self, target_node)
8106 cluster = self.cfg.GetClusterInfo()
8107 group_info = self.cfg.GetNodeGroup(node.group)
8108 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8109 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8110 ignore=self.op.ignore_ipolicy)
8112 if instance.admin_state == constants.ADMINST_UP:
8113 # check memory requirements on the secondary node
8114 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8115 instance.name, bep[constants.BE_MAXMEM],
8116 instance.hypervisor)
8118 self.LogInfo("Not checking memory on the secondary node as"
8119 " instance will not be started")
8121 # check bridge existance
8122 _CheckInstanceBridgesExist(self, instance, node=target_node)
8124 def Exec(self, feedback_fn):
8125 """Move an instance.
8127 The move is done by shutting it down on its present node, copying
8128 the data over (slow) and starting it on the new node.
8131 instance = self.instance
8133 source_node = instance.primary_node
8134 target_node = self.target_node
8136 self.LogInfo("Shutting down instance %s on source node %s",
8137 instance.name, source_node)
8139 assert (self.owned_locks(locking.LEVEL_NODE) ==
8140 self.owned_locks(locking.LEVEL_NODE_RES))
8142 result = self.rpc.call_instance_shutdown(source_node, instance,
8143 self.op.shutdown_timeout)
8144 msg = result.fail_msg
8146 if self.op.ignore_consistency:
8147 self.LogWarning("Could not shutdown instance %s on node %s."
8148 " Proceeding anyway. Please make sure node"
8149 " %s is down. Error details: %s",
8150 instance.name, source_node, source_node, msg)
8152 raise errors.OpExecError("Could not shutdown instance %s on"
8154 (instance.name, source_node, msg))
8156 # create the target disks
8158 _CreateDisks(self, instance, target_node=target_node)
8159 except errors.OpExecError:
8160 self.LogWarning("Device creation failed, reverting...")
8162 _RemoveDisks(self, instance, target_node=target_node)
8164 self.cfg.ReleaseDRBDMinors(instance.name)
8167 cluster_name = self.cfg.GetClusterInfo().cluster_name
8170 # activate, get path, copy the data over
8171 for idx, disk in enumerate(instance.disks):
8172 self.LogInfo("Copying data for disk %d", idx)
8173 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8174 instance.name, True, idx)
8176 self.LogWarning("Can't assemble newly created disk %d: %s",
8177 idx, result.fail_msg)
8178 errs.append(result.fail_msg)
8180 dev_path = result.payload
8181 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8182 target_node, dev_path,
8185 self.LogWarning("Can't copy data over for disk %d: %s",
8186 idx, result.fail_msg)
8187 errs.append(result.fail_msg)
8191 self.LogWarning("Some disks failed to copy, aborting")
8193 _RemoveDisks(self, instance, target_node=target_node)
8195 self.cfg.ReleaseDRBDMinors(instance.name)
8196 raise errors.OpExecError("Errors during disk copy: %s" %
8199 instance.primary_node = target_node
8200 self.cfg.Update(instance, feedback_fn)
8202 self.LogInfo("Removing the disks on the original node")
8203 _RemoveDisks(self, instance, target_node=source_node)
8205 # Only start the instance if it's marked as up
8206 if instance.admin_state == constants.ADMINST_UP:
8207 self.LogInfo("Starting instance %s on node %s",
8208 instance.name, target_node)
8210 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8211 ignore_secondaries=True)
8213 _ShutdownInstanceDisks(self, instance)
8214 raise errors.OpExecError("Can't activate the instance's disks")
8216 result = self.rpc.call_instance_start(target_node,
8217 (instance, None, None), False)
8218 msg = result.fail_msg
8220 _ShutdownInstanceDisks(self, instance)
8221 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8222 (instance.name, target_node, msg))
8225 class LUNodeMigrate(LogicalUnit):
8226 """Migrate all instances from a node.
8229 HPATH = "node-migrate"
8230 HTYPE = constants.HTYPE_NODE
8233 def CheckArguments(self):
8236 def ExpandNames(self):
8237 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8239 self.share_locks = _ShareAll()
8240 self.needed_locks = {
8241 locking.LEVEL_NODE: [self.op.node_name],
8244 def BuildHooksEnv(self):
8247 This runs on the master, the primary and all the secondaries.
8251 "NODE_NAME": self.op.node_name,
8252 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8255 def BuildHooksNodes(self):
8256 """Build hooks nodes.
8259 nl = [self.cfg.GetMasterNode()]
8262 def CheckPrereq(self):
8265 def Exec(self, feedback_fn):
8266 # Prepare jobs for migration instances
8267 allow_runtime_changes = self.op.allow_runtime_changes
8269 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8272 iallocator=self.op.iallocator,
8273 target_node=self.op.target_node,
8274 allow_runtime_changes=allow_runtime_changes,
8275 ignore_ipolicy=self.op.ignore_ipolicy)]
8276 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8278 # TODO: Run iallocator in this opcode and pass correct placement options to
8279 # OpInstanceMigrate. Since other jobs can modify the cluster between
8280 # running the iallocator and the actual migration, a good consistency model
8281 # will have to be found.
8283 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8284 frozenset([self.op.node_name]))
8286 return ResultWithJobs(jobs)
8289 class TLMigrateInstance(Tasklet):
8290 """Tasklet class for instance migration.
8293 @ivar live: whether the migration will be done live or non-live;
8294 this variable is initalized only after CheckPrereq has run
8295 @type cleanup: boolean
8296 @ivar cleanup: Wheater we cleanup from a failed migration
8297 @type iallocator: string
8298 @ivar iallocator: The iallocator used to determine target_node
8299 @type target_node: string
8300 @ivar target_node: If given, the target_node to reallocate the instance to
8301 @type failover: boolean
8302 @ivar failover: Whether operation results in failover or migration
8303 @type fallback: boolean
8304 @ivar fallback: Whether fallback to failover is allowed if migration not
8306 @type ignore_consistency: boolean
8307 @ivar ignore_consistency: Wheter we should ignore consistency between source
8309 @type shutdown_timeout: int
8310 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8311 @type ignore_ipolicy: bool
8312 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8317 _MIGRATION_POLL_INTERVAL = 1 # seconds
8318 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8320 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8321 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8323 """Initializes this class.
8326 Tasklet.__init__(self, lu)
8329 self.instance_name = instance_name
8330 self.cleanup = cleanup
8331 self.live = False # will be overridden later
8332 self.failover = failover
8333 self.fallback = fallback
8334 self.ignore_consistency = ignore_consistency
8335 self.shutdown_timeout = shutdown_timeout
8336 self.ignore_ipolicy = ignore_ipolicy
8337 self.allow_runtime_changes = allow_runtime_changes
8339 def CheckPrereq(self):
8340 """Check prerequisites.
8342 This checks that the instance is in the cluster.
8345 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8346 instance = self.cfg.GetInstanceInfo(instance_name)
8347 assert instance is not None
8348 self.instance = instance
8349 cluster = self.cfg.GetClusterInfo()
8351 if (not self.cleanup and
8352 not instance.admin_state == constants.ADMINST_UP and
8353 not self.failover and self.fallback):
8354 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8355 " switching to failover")
8356 self.failover = True
8358 if instance.disk_template not in constants.DTS_MIRRORED:
8363 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8364 " %s" % (instance.disk_template, text),
8367 if instance.disk_template in constants.DTS_EXT_MIRROR:
8368 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8370 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8372 if self.lu.op.iallocator:
8373 self._RunAllocator()
8375 # We set set self.target_node as it is required by
8377 self.target_node = self.lu.op.target_node
8379 # Check that the target node is correct in terms of instance policy
8380 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8381 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8382 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8384 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8385 ignore=self.ignore_ipolicy)
8387 # self.target_node is already populated, either directly or by the
8389 target_node = self.target_node
8390 if self.target_node == instance.primary_node:
8391 raise errors.OpPrereqError("Cannot migrate instance %s"
8392 " to its primary (%s)" %
8393 (instance.name, instance.primary_node),
8396 if len(self.lu.tasklets) == 1:
8397 # It is safe to release locks only when we're the only tasklet
8399 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8400 keep=[instance.primary_node, self.target_node])
8401 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8404 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8406 secondary_nodes = instance.secondary_nodes
8407 if not secondary_nodes:
8408 raise errors.ConfigurationError("No secondary node but using"
8409 " %s disk template" %
8410 instance.disk_template)
8411 target_node = secondary_nodes[0]
8412 if self.lu.op.iallocator or (self.lu.op.target_node and
8413 self.lu.op.target_node != target_node):
8415 text = "failed over"
8418 raise errors.OpPrereqError("Instances with disk template %s cannot"
8419 " be %s to arbitrary nodes"
8420 " (neither an iallocator nor a target"
8421 " node can be passed)" %
8422 (instance.disk_template, text),
8424 nodeinfo = self.cfg.GetNodeInfo(target_node)
8425 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8426 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8428 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8429 ignore=self.ignore_ipolicy)
8431 i_be = cluster.FillBE(instance)
8433 # check memory requirements on the secondary node
8434 if (not self.cleanup and
8435 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8436 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8437 "migrating instance %s" %
8439 i_be[constants.BE_MINMEM],
8440 instance.hypervisor)
8442 self.lu.LogInfo("Not checking memory on the secondary node as"
8443 " instance will not be started")
8445 # check if failover must be forced instead of migration
8446 if (not self.cleanup and not self.failover and
8447 i_be[constants.BE_ALWAYS_FAILOVER]):
8448 self.lu.LogInfo("Instance configured to always failover; fallback"
8450 self.failover = True
8452 # check bridge existance
8453 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8455 if not self.cleanup:
8456 _CheckNodeNotDrained(self.lu, target_node)
8457 if not self.failover:
8458 result = self.rpc.call_instance_migratable(instance.primary_node,
8460 if result.fail_msg and self.fallback:
8461 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8463 self.failover = True
8465 result.Raise("Can't migrate, please use failover",
8466 prereq=True, ecode=errors.ECODE_STATE)
8468 assert not (self.failover and self.cleanup)
8470 if not self.failover:
8471 if self.lu.op.live is not None and self.lu.op.mode is not None:
8472 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8473 " parameters are accepted",
8475 if self.lu.op.live is not None:
8477 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8479 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8480 # reset the 'live' parameter to None so that repeated
8481 # invocations of CheckPrereq do not raise an exception
8482 self.lu.op.live = None
8483 elif self.lu.op.mode is None:
8484 # read the default value from the hypervisor
8485 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8486 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8488 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8490 # Failover is never live
8493 if not (self.failover or self.cleanup):
8494 remote_info = self.rpc.call_instance_info(instance.primary_node,
8496 instance.hypervisor)
8497 remote_info.Raise("Error checking instance on node %s" %
8498 instance.primary_node)
8499 instance_running = bool(remote_info.payload)
8500 if instance_running:
8501 self.current_mem = int(remote_info.payload["memory"])
8503 def _RunAllocator(self):
8504 """Run the allocator based on input opcode.
8507 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8509 # FIXME: add a self.ignore_ipolicy option
8510 req = iallocator.IAReqRelocate(name=self.instance_name,
8511 relocate_from=[self.instance.primary_node])
8512 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8514 ial.Run(self.lu.op.iallocator)
8517 raise errors.OpPrereqError("Can't compute nodes using"
8518 " iallocator '%s': %s" %
8519 (self.lu.op.iallocator, ial.info),
8521 self.target_node = ial.result[0]
8522 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8523 self.instance_name, self.lu.op.iallocator,
8524 utils.CommaJoin(ial.result))
8526 def _WaitUntilSync(self):
8527 """Poll with custom rpc for disk sync.
8529 This uses our own step-based rpc call.
8532 self.feedback_fn("* wait until resync is done")
8536 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8538 (self.instance.disks,
8541 for node, nres in result.items():
8542 nres.Raise("Cannot resync disks on node %s" % node)
8543 node_done, node_percent = nres.payload
8544 all_done = all_done and node_done
8545 if node_percent is not None:
8546 min_percent = min(min_percent, node_percent)
8548 if min_percent < 100:
8549 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8552 def _EnsureSecondary(self, node):
8553 """Demote a node to secondary.
8556 self.feedback_fn("* switching node %s to secondary mode" % node)
8558 for dev in self.instance.disks:
8559 self.cfg.SetDiskID(dev, node)
8561 result = self.rpc.call_blockdev_close(node, self.instance.name,
8562 self.instance.disks)
8563 result.Raise("Cannot change disk to secondary on node %s" % node)
8565 def _GoStandalone(self):
8566 """Disconnect from the network.
8569 self.feedback_fn("* changing into standalone mode")
8570 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8571 self.instance.disks)
8572 for node, nres in result.items():
8573 nres.Raise("Cannot disconnect disks node %s" % node)
8575 def _GoReconnect(self, multimaster):
8576 """Reconnect to the network.
8582 msg = "single-master"
8583 self.feedback_fn("* changing disks into %s mode" % msg)
8584 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8585 (self.instance.disks, self.instance),
8586 self.instance.name, multimaster)
8587 for node, nres in result.items():
8588 nres.Raise("Cannot change disks config on node %s" % node)
8590 def _ExecCleanup(self):
8591 """Try to cleanup after a failed migration.
8593 The cleanup is done by:
8594 - check that the instance is running only on one node
8595 (and update the config if needed)
8596 - change disks on its secondary node to secondary
8597 - wait until disks are fully synchronized
8598 - disconnect from the network
8599 - change disks into single-master mode
8600 - wait again until disks are fully synchronized
8603 instance = self.instance
8604 target_node = self.target_node
8605 source_node = self.source_node
8607 # check running on only one node
8608 self.feedback_fn("* checking where the instance actually runs"
8609 " (if this hangs, the hypervisor might be in"
8611 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8612 for node, result in ins_l.items():
8613 result.Raise("Can't contact node %s" % node)
8615 runningon_source = instance.name in ins_l[source_node].payload
8616 runningon_target = instance.name in ins_l[target_node].payload
8618 if runningon_source and runningon_target:
8619 raise errors.OpExecError("Instance seems to be running on two nodes,"
8620 " or the hypervisor is confused; you will have"
8621 " to ensure manually that it runs only on one"
8622 " and restart this operation")
8624 if not (runningon_source or runningon_target):
8625 raise errors.OpExecError("Instance does not seem to be running at all;"
8626 " in this case it's safer to repair by"
8627 " running 'gnt-instance stop' to ensure disk"
8628 " shutdown, and then restarting it")
8630 if runningon_target:
8631 # the migration has actually succeeded, we need to update the config
8632 self.feedback_fn("* instance running on secondary node (%s),"
8633 " updating config" % target_node)
8634 instance.primary_node = target_node
8635 self.cfg.Update(instance, self.feedback_fn)
8636 demoted_node = source_node
8638 self.feedback_fn("* instance confirmed to be running on its"
8639 " primary node (%s)" % source_node)
8640 demoted_node = target_node
8642 if instance.disk_template in constants.DTS_INT_MIRROR:
8643 self._EnsureSecondary(demoted_node)
8645 self._WaitUntilSync()
8646 except errors.OpExecError:
8647 # we ignore here errors, since if the device is standalone, it
8648 # won't be able to sync
8650 self._GoStandalone()
8651 self._GoReconnect(False)
8652 self._WaitUntilSync()
8654 self.feedback_fn("* done")
8656 def _RevertDiskStatus(self):
8657 """Try to revert the disk status after a failed migration.
8660 target_node = self.target_node
8661 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8665 self._EnsureSecondary(target_node)
8666 self._GoStandalone()
8667 self._GoReconnect(False)
8668 self._WaitUntilSync()
8669 except errors.OpExecError, err:
8670 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8671 " please try to recover the instance manually;"
8672 " error '%s'" % str(err))
8674 def _AbortMigration(self):
8675 """Call the hypervisor code to abort a started migration.
8678 instance = self.instance
8679 target_node = self.target_node
8680 source_node = self.source_node
8681 migration_info = self.migration_info
8683 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8687 abort_msg = abort_result.fail_msg
8689 logging.error("Aborting migration failed on target node %s: %s",
8690 target_node, abort_msg)
8691 # Don't raise an exception here, as we stil have to try to revert the
8692 # disk status, even if this step failed.
8694 abort_result = self.rpc.call_instance_finalize_migration_src(
8695 source_node, instance, False, self.live)
8696 abort_msg = abort_result.fail_msg
8698 logging.error("Aborting migration failed on source node %s: %s",
8699 source_node, abort_msg)
8701 def _ExecMigration(self):
8702 """Migrate an instance.
8704 The migrate is done by:
8705 - change the disks into dual-master mode
8706 - wait until disks are fully synchronized again
8707 - migrate the instance
8708 - change disks on the new secondary node (the old primary) to secondary
8709 - wait until disks are fully synchronized
8710 - change disks into single-master mode
8713 instance = self.instance
8714 target_node = self.target_node
8715 source_node = self.source_node
8717 # Check for hypervisor version mismatch and warn the user.
8718 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8719 None, [self.instance.hypervisor])
8720 for ninfo in nodeinfo.values():
8721 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8723 (_, _, (src_info, )) = nodeinfo[source_node].payload
8724 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8726 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8727 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8728 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8729 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8730 if src_version != dst_version:
8731 self.feedback_fn("* warning: hypervisor version mismatch between"
8732 " source (%s) and target (%s) node" %
8733 (src_version, dst_version))
8735 self.feedback_fn("* checking disk consistency between source and target")
8736 for (idx, dev) in enumerate(instance.disks):
8737 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8738 raise errors.OpExecError("Disk %s is degraded or not fully"
8739 " synchronized on target node,"
8740 " aborting migration" % idx)
8742 if self.current_mem > self.tgt_free_mem:
8743 if not self.allow_runtime_changes:
8744 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8745 " free memory to fit instance %s on target"
8746 " node %s (have %dMB, need %dMB)" %
8747 (instance.name, target_node,
8748 self.tgt_free_mem, self.current_mem))
8749 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8750 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8753 rpcres.Raise("Cannot modify instance runtime memory")
8755 # First get the migration information from the remote node
8756 result = self.rpc.call_migration_info(source_node, instance)
8757 msg = result.fail_msg
8759 log_err = ("Failed fetching source migration information from %s: %s" %
8761 logging.error(log_err)
8762 raise errors.OpExecError(log_err)
8764 self.migration_info = migration_info = result.payload
8766 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8767 # Then switch the disks to master/master mode
8768 self._EnsureSecondary(target_node)
8769 self._GoStandalone()
8770 self._GoReconnect(True)
8771 self._WaitUntilSync()
8773 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8774 result = self.rpc.call_accept_instance(target_node,
8777 self.nodes_ip[target_node])
8779 msg = result.fail_msg
8781 logging.error("Instance pre-migration failed, trying to revert"
8782 " disk status: %s", msg)
8783 self.feedback_fn("Pre-migration failed, aborting")
8784 self._AbortMigration()
8785 self._RevertDiskStatus()
8786 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8787 (instance.name, msg))
8789 self.feedback_fn("* migrating instance to %s" % target_node)
8790 result = self.rpc.call_instance_migrate(source_node, instance,
8791 self.nodes_ip[target_node],
8793 msg = result.fail_msg
8795 logging.error("Instance migration failed, trying to revert"
8796 " disk status: %s", msg)
8797 self.feedback_fn("Migration failed, aborting")
8798 self._AbortMigration()
8799 self._RevertDiskStatus()
8800 raise errors.OpExecError("Could not migrate instance %s: %s" %
8801 (instance.name, msg))
8803 self.feedback_fn("* starting memory transfer")
8804 last_feedback = time.time()
8806 result = self.rpc.call_instance_get_migration_status(source_node,
8808 msg = result.fail_msg
8809 ms = result.payload # MigrationStatus instance
8810 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8811 logging.error("Instance migration failed, trying to revert"
8812 " disk status: %s", msg)
8813 self.feedback_fn("Migration failed, aborting")
8814 self._AbortMigration()
8815 self._RevertDiskStatus()
8817 msg = "hypervisor returned failure"
8818 raise errors.OpExecError("Could not migrate instance %s: %s" %
8819 (instance.name, msg))
8821 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8822 self.feedback_fn("* memory transfer complete")
8825 if (utils.TimeoutExpired(last_feedback,
8826 self._MIGRATION_FEEDBACK_INTERVAL) and
8827 ms.transferred_ram is not None):
8828 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8829 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8830 last_feedback = time.time()
8832 time.sleep(self._MIGRATION_POLL_INTERVAL)
8834 result = self.rpc.call_instance_finalize_migration_src(source_node,
8838 msg = result.fail_msg
8840 logging.error("Instance migration succeeded, but finalization failed"
8841 " on the source node: %s", msg)
8842 raise errors.OpExecError("Could not finalize instance migration: %s" %
8845 instance.primary_node = target_node
8847 # distribute new instance config to the other nodes
8848 self.cfg.Update(instance, self.feedback_fn)
8850 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8854 msg = result.fail_msg
8856 logging.error("Instance migration succeeded, but finalization failed"
8857 " on the target node: %s", msg)
8858 raise errors.OpExecError("Could not finalize instance migration: %s" %
8861 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8862 self._EnsureSecondary(source_node)
8863 self._WaitUntilSync()
8864 self._GoStandalone()
8865 self._GoReconnect(False)
8866 self._WaitUntilSync()
8868 # If the instance's disk template is `rbd' and there was a successful
8869 # migration, unmap the device from the source node.
8870 if self.instance.disk_template == constants.DT_RBD:
8871 disks = _ExpandCheckDisks(instance, instance.disks)
8872 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8874 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8875 msg = result.fail_msg
8877 logging.error("Migration was successful, but couldn't unmap the"
8878 " block device %s on source node %s: %s",
8879 disk.iv_name, source_node, msg)
8880 logging.error("You need to unmap the device %s manually on %s",
8881 disk.iv_name, source_node)
8883 self.feedback_fn("* done")
8885 def _ExecFailover(self):
8886 """Failover an instance.
8888 The failover is done by shutting it down on its present node and
8889 starting it on the secondary.
8892 instance = self.instance
8893 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8895 source_node = instance.primary_node
8896 target_node = self.target_node
8898 if instance.admin_state == constants.ADMINST_UP:
8899 self.feedback_fn("* checking disk consistency between source and target")
8900 for (idx, dev) in enumerate(instance.disks):
8901 # for drbd, these are drbd over lvm
8902 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8904 if primary_node.offline:
8905 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8907 (primary_node.name, idx, target_node))
8908 elif not self.ignore_consistency:
8909 raise errors.OpExecError("Disk %s is degraded on target node,"
8910 " aborting failover" % idx)
8912 self.feedback_fn("* not checking disk consistency as instance is not"
8915 self.feedback_fn("* shutting down instance on source node")
8916 logging.info("Shutting down instance %s on node %s",
8917 instance.name, source_node)
8919 result = self.rpc.call_instance_shutdown(source_node, instance,
8920 self.shutdown_timeout)
8921 msg = result.fail_msg
8923 if self.ignore_consistency or primary_node.offline:
8924 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8925 " proceeding anyway; please make sure node"
8926 " %s is down; error details: %s",
8927 instance.name, source_node, source_node, msg)
8929 raise errors.OpExecError("Could not shutdown instance %s on"
8931 (instance.name, source_node, msg))
8933 self.feedback_fn("* deactivating the instance's disks on source node")
8934 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8935 raise errors.OpExecError("Can't shut down the instance's disks")
8937 instance.primary_node = target_node
8938 # distribute new instance config to the other nodes
8939 self.cfg.Update(instance, self.feedback_fn)
8941 # Only start the instance if it's marked as up
8942 if instance.admin_state == constants.ADMINST_UP:
8943 self.feedback_fn("* activating the instance's disks on target node %s" %
8945 logging.info("Starting instance %s on node %s",
8946 instance.name, target_node)
8948 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8949 ignore_secondaries=True)
8951 _ShutdownInstanceDisks(self.lu, instance)
8952 raise errors.OpExecError("Can't activate the instance's disks")
8954 self.feedback_fn("* starting the instance on the target node %s" %
8956 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8958 msg = result.fail_msg
8960 _ShutdownInstanceDisks(self.lu, instance)
8961 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8962 (instance.name, target_node, msg))
8964 def Exec(self, feedback_fn):
8965 """Perform the migration.
8968 self.feedback_fn = feedback_fn
8969 self.source_node = self.instance.primary_node
8971 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8972 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8973 self.target_node = self.instance.secondary_nodes[0]
8974 # Otherwise self.target_node has been populated either
8975 # directly, or through an iallocator.
8977 self.all_nodes = [self.source_node, self.target_node]
8978 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8979 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8982 feedback_fn("Failover instance %s" % self.instance.name)
8983 self._ExecFailover()
8985 feedback_fn("Migrating instance %s" % self.instance.name)
8988 return self._ExecCleanup()
8990 return self._ExecMigration()
8993 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8995 """Wrapper around L{_CreateBlockDevInner}.
8997 This method annotates the root device first.
9000 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9001 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9005 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9007 """Create a tree of block devices on a given node.
9009 If this device type has to be created on secondaries, create it and
9012 If not, just recurse to children keeping the same 'force' value.
9014 @attention: The device has to be annotated already.
9016 @param lu: the lu on whose behalf we execute
9017 @param node: the node on which to create the device
9018 @type instance: L{objects.Instance}
9019 @param instance: the instance which owns the device
9020 @type device: L{objects.Disk}
9021 @param device: the device to create
9022 @type force_create: boolean
9023 @param force_create: whether to force creation of this device; this
9024 will be change to True whenever we find a device which has
9025 CreateOnSecondary() attribute
9026 @param info: the extra 'metadata' we should attach to the device
9027 (this will be represented as a LVM tag)
9028 @type force_open: boolean
9029 @param force_open: this parameter will be passes to the
9030 L{backend.BlockdevCreate} function where it specifies
9031 whether we run on primary or not, and it affects both
9032 the child assembly and the device own Open() execution
9035 if device.CreateOnSecondary():
9039 for child in device.children:
9040 _CreateBlockDevInner(lu, node, instance, child, force_create,
9043 if not force_create:
9046 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
9049 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
9050 """Create a single block device on a given node.
9052 This will not recurse over children of the device, so they must be
9055 @param lu: the lu on whose behalf we execute
9056 @param node: the node on which to create the device
9057 @type instance: L{objects.Instance}
9058 @param instance: the instance which owns the device
9059 @type device: L{objects.Disk}
9060 @param device: the device to create
9061 @param info: the extra 'metadata' we should attach to the device
9062 (this will be represented as a LVM tag)
9063 @type force_open: boolean
9064 @param force_open: this parameter will be passes to the
9065 L{backend.BlockdevCreate} function where it specifies
9066 whether we run on primary or not, and it affects both
9067 the child assembly and the device own Open() execution
9070 lu.cfg.SetDiskID(device, node)
9071 result = lu.rpc.call_blockdev_create(node, device, device.size,
9072 instance.name, force_open, info)
9073 result.Raise("Can't create block device %s on"
9074 " node %s for instance %s" % (device, node, instance.name))
9075 if device.physical_id is None:
9076 device.physical_id = result.payload
9079 def _GenerateUniqueNames(lu, exts):
9080 """Generate a suitable LV name.
9082 This will generate a logical volume name for the given instance.
9087 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9088 results.append("%s%s" % (new_id, val))
9092 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9093 iv_name, p_minor, s_minor):
9094 """Generate a drbd8 device complete with its children.
9097 assert len(vgnames) == len(names) == 2
9098 port = lu.cfg.AllocatePort()
9099 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9101 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9102 logical_id=(vgnames[0], names[0]),
9104 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9105 size=constants.DRBD_META_SIZE,
9106 logical_id=(vgnames[1], names[1]),
9108 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9109 logical_id=(primary, secondary, port,
9112 children=[dev_data, dev_meta],
9113 iv_name=iv_name, params={})
9117 _DISK_TEMPLATE_NAME_PREFIX = {
9118 constants.DT_PLAIN: "",
9119 constants.DT_RBD: ".rbd",
9123 _DISK_TEMPLATE_DEVICE_TYPE = {
9124 constants.DT_PLAIN: constants.LD_LV,
9125 constants.DT_FILE: constants.LD_FILE,
9126 constants.DT_SHARED_FILE: constants.LD_FILE,
9127 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9128 constants.DT_RBD: constants.LD_RBD,
9132 def _GenerateDiskTemplate(
9133 lu, template_name, instance_name, primary_node, secondary_nodes,
9134 disk_info, file_storage_dir, file_driver, base_index,
9135 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9136 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9137 """Generate the entire disk layout for a given template type.
9140 vgname = lu.cfg.GetVGName()
9141 disk_count = len(disk_info)
9144 if template_name == constants.DT_DISKLESS:
9146 elif template_name == constants.DT_DRBD8:
9147 if len(secondary_nodes) != 1:
9148 raise errors.ProgrammerError("Wrong template configuration")
9149 remote_node = secondary_nodes[0]
9150 minors = lu.cfg.AllocateDRBDMinor(
9151 [primary_node, remote_node] * len(disk_info), instance_name)
9153 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9155 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9158 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9159 for i in range(disk_count)]):
9160 names.append(lv_prefix + "_data")
9161 names.append(lv_prefix + "_meta")
9162 for idx, disk in enumerate(disk_info):
9163 disk_index = idx + base_index
9164 data_vg = disk.get(constants.IDISK_VG, vgname)
9165 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9166 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9167 disk[constants.IDISK_SIZE],
9169 names[idx * 2:idx * 2 + 2],
9170 "disk/%d" % disk_index,
9171 minors[idx * 2], minors[idx * 2 + 1])
9172 disk_dev.mode = disk[constants.IDISK_MODE]
9173 disks.append(disk_dev)
9176 raise errors.ProgrammerError("Wrong template configuration")
9178 if template_name == constants.DT_FILE:
9180 elif template_name == constants.DT_SHARED_FILE:
9181 _req_shr_file_storage()
9183 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9184 if name_prefix is None:
9187 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9188 (name_prefix, base_index + i)
9189 for i in range(disk_count)])
9191 if template_name == constants.DT_PLAIN:
9193 def logical_id_fn(idx, _, disk):
9194 vg = disk.get(constants.IDISK_VG, vgname)
9195 return (vg, names[idx])
9197 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9199 lambda _, disk_index, disk: (file_driver,
9200 "%s/disk%d" % (file_storage_dir,
9202 elif template_name == constants.DT_BLOCK:
9204 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9205 disk[constants.IDISK_ADOPT])
9206 elif template_name == constants.DT_RBD:
9207 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9209 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9211 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9213 for idx, disk in enumerate(disk_info):
9214 disk_index = idx + base_index
9215 size = disk[constants.IDISK_SIZE]
9216 feedback_fn("* disk %s, size %s" %
9217 (disk_index, utils.FormatUnit(size, "h")))
9218 disks.append(objects.Disk(dev_type=dev_type, size=size,
9219 logical_id=logical_id_fn(idx, disk_index, disk),
9220 iv_name="disk/%d" % disk_index,
9221 mode=disk[constants.IDISK_MODE],
9227 def _GetInstanceInfoText(instance):
9228 """Compute that text that should be added to the disk's metadata.
9231 return "originstname+%s" % instance.name
9234 def _CalcEta(time_taken, written, total_size):
9235 """Calculates the ETA based on size written and total size.
9237 @param time_taken: The time taken so far
9238 @param written: amount written so far
9239 @param total_size: The total size of data to be written
9240 @return: The remaining time in seconds
9243 avg_time = time_taken / float(written)
9244 return (total_size - written) * avg_time
9247 def _WipeDisks(lu, instance, disks=None):
9248 """Wipes instance disks.
9250 @type lu: L{LogicalUnit}
9251 @param lu: the logical unit on whose behalf we execute
9252 @type instance: L{objects.Instance}
9253 @param instance: the instance whose disks we should create
9254 @return: the success of the wipe
9257 node = instance.primary_node
9260 disks = [(idx, disk, 0)
9261 for (idx, disk) in enumerate(instance.disks)]
9263 for (_, device, _) in disks:
9264 lu.cfg.SetDiskID(device, node)
9266 logging.info("Pausing synchronization of disks of instance '%s'",
9268 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9269 (map(compat.snd, disks),
9272 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9274 for idx, success in enumerate(result.payload):
9276 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9277 " failed", idx, instance.name)
9280 for (idx, device, offset) in disks:
9281 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9282 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9284 int(min(constants.MAX_WIPE_CHUNK,
9285 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9289 start_time = time.time()
9294 info_text = (" (from %s to %s)" %
9295 (utils.FormatUnit(offset, "h"),
9296 utils.FormatUnit(size, "h")))
9298 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9300 logging.info("Wiping disk %d for instance %s on node %s using"
9301 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9303 while offset < size:
9304 wipe_size = min(wipe_chunk_size, size - offset)
9306 logging.debug("Wiping disk %d, offset %s, chunk %s",
9307 idx, offset, wipe_size)
9309 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9311 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9312 (idx, offset, wipe_size))
9316 if now - last_output >= 60:
9317 eta = _CalcEta(now - start_time, offset, size)
9318 lu.LogInfo(" - done: %.1f%% ETA: %s",
9319 offset / float(size) * 100, utils.FormatSeconds(eta))
9322 logging.info("Resuming synchronization of disks for instance '%s'",
9325 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9326 (map(compat.snd, disks),
9331 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9332 node, result.fail_msg)
9334 for idx, success in enumerate(result.payload):
9336 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9337 " failed", idx, instance.name)
9340 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9341 """Create all disks for an instance.
9343 This abstracts away some work from AddInstance.
9345 @type lu: L{LogicalUnit}
9346 @param lu: the logical unit on whose behalf we execute
9347 @type instance: L{objects.Instance}
9348 @param instance: the instance whose disks we should create
9350 @param to_skip: list of indices to skip
9351 @type target_node: string
9352 @param target_node: if passed, overrides the target node for creation
9354 @return: the success of the creation
9357 info = _GetInstanceInfoText(instance)
9358 if target_node is None:
9359 pnode = instance.primary_node
9360 all_nodes = instance.all_nodes
9365 if instance.disk_template in constants.DTS_FILEBASED:
9366 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9367 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9369 result.Raise("Failed to create directory '%s' on"
9370 " node %s" % (file_storage_dir, pnode))
9372 # Note: this needs to be kept in sync with adding of disks in
9373 # LUInstanceSetParams
9374 for idx, device in enumerate(instance.disks):
9375 if to_skip and idx in to_skip:
9377 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9379 for node in all_nodes:
9380 f_create = node == pnode
9381 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9384 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9385 """Remove all disks for an instance.
9387 This abstracts away some work from `AddInstance()` and
9388 `RemoveInstance()`. Note that in case some of the devices couldn't
9389 be removed, the removal will continue with the other ones (compare
9390 with `_CreateDisks()`).
9392 @type lu: L{LogicalUnit}
9393 @param lu: the logical unit on whose behalf we execute
9394 @type instance: L{objects.Instance}
9395 @param instance: the instance whose disks we should remove
9396 @type target_node: string
9397 @param target_node: used to override the node on which to remove the disks
9399 @return: the success of the removal
9402 logging.info("Removing block devices for instance %s", instance.name)
9405 ports_to_release = set()
9406 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9407 for (idx, device) in enumerate(anno_disks):
9409 edata = [(target_node, device)]
9411 edata = device.ComputeNodeTree(instance.primary_node)
9412 for node, disk in edata:
9413 lu.cfg.SetDiskID(disk, node)
9414 result = lu.rpc.call_blockdev_remove(node, disk)
9416 lu.LogWarning("Could not remove disk %s on node %s,"
9417 " continuing anyway: %s", idx, node, result.fail_msg)
9418 if not (result.offline and node != instance.primary_node):
9421 # if this is a DRBD disk, return its port to the pool
9422 if device.dev_type in constants.LDS_DRBD:
9423 ports_to_release.add(device.logical_id[2])
9425 if all_result or ignore_failures:
9426 for port in ports_to_release:
9427 lu.cfg.AddTcpUdpPort(port)
9429 if instance.disk_template in constants.DTS_FILEBASED:
9430 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9434 tgt = instance.primary_node
9435 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9437 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9438 file_storage_dir, instance.primary_node, result.fail_msg)
9444 def _ComputeDiskSizePerVG(disk_template, disks):
9445 """Compute disk size requirements in the volume group
9448 def _compute(disks, payload):
9449 """Universal algorithm.
9454 vgs[disk[constants.IDISK_VG]] = \
9455 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9459 # Required free disk space as a function of disk and swap space
9461 constants.DT_DISKLESS: {},
9462 constants.DT_PLAIN: _compute(disks, 0),
9463 # 128 MB are added for drbd metadata for each disk
9464 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9465 constants.DT_FILE: {},
9466 constants.DT_SHARED_FILE: {},
9469 if disk_template not in req_size_dict:
9470 raise errors.ProgrammerError("Disk template '%s' size requirement"
9471 " is unknown" % disk_template)
9473 return req_size_dict[disk_template]
9476 def _FilterVmNodes(lu, nodenames):
9477 """Filters out non-vm_capable nodes from a list.
9479 @type lu: L{LogicalUnit}
9480 @param lu: the logical unit for which we check
9481 @type nodenames: list
9482 @param nodenames: the list of nodes on which we should check
9484 @return: the list of vm-capable nodes
9487 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9488 return [name for name in nodenames if name not in vm_nodes]
9491 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9492 """Hypervisor parameter validation.
9494 This function abstract the hypervisor parameter validation to be
9495 used in both instance create and instance modify.
9497 @type lu: L{LogicalUnit}
9498 @param lu: the logical unit for which we check
9499 @type nodenames: list
9500 @param nodenames: the list of nodes on which we should check
9501 @type hvname: string
9502 @param hvname: the name of the hypervisor we should use
9503 @type hvparams: dict
9504 @param hvparams: the parameters which we need to check
9505 @raise errors.OpPrereqError: if the parameters are not valid
9508 nodenames = _FilterVmNodes(lu, nodenames)
9510 cluster = lu.cfg.GetClusterInfo()
9511 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9513 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9514 for node in nodenames:
9518 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9521 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9522 """OS parameters validation.
9524 @type lu: L{LogicalUnit}
9525 @param lu: the logical unit for which we check
9526 @type required: boolean
9527 @param required: whether the validation should fail if the OS is not
9529 @type nodenames: list
9530 @param nodenames: the list of nodes on which we should check
9531 @type osname: string
9532 @param osname: the name of the hypervisor we should use
9533 @type osparams: dict
9534 @param osparams: the parameters which we need to check
9535 @raise errors.OpPrereqError: if the parameters are not valid
9538 nodenames = _FilterVmNodes(lu, nodenames)
9539 result = lu.rpc.call_os_validate(nodenames, required, osname,
9540 [constants.OS_VALIDATE_PARAMETERS],
9542 for node, nres in result.items():
9543 # we don't check for offline cases since this should be run only
9544 # against the master node and/or an instance's nodes
9545 nres.Raise("OS Parameters validation failed on node %s" % node)
9546 if not nres.payload:
9547 lu.LogInfo("OS %s not found on node %s, validation skipped",
9551 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9552 """Wrapper around IAReqInstanceAlloc.
9554 @param op: The instance opcode
9555 @param disks: The computed disks
9556 @param nics: The computed nics
9557 @param beparams: The full filled beparams
9558 @param node_whitelist: List of nodes which should appear as online to the
9559 allocator (unless the node is already marked offline)
9561 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9564 spindle_use = beparams[constants.BE_SPINDLE_USE]
9565 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9566 disk_template=op.disk_template,
9569 vcpus=beparams[constants.BE_VCPUS],
9570 memory=beparams[constants.BE_MAXMEM],
9571 spindle_use=spindle_use,
9573 nics=[n.ToDict() for n in nics],
9574 hypervisor=op.hypervisor,
9575 node_whitelist=node_whitelist)
9578 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9579 """Computes the nics.
9581 @param op: The instance opcode
9582 @param cluster: Cluster configuration object
9583 @param default_ip: The default ip to assign
9584 @param cfg: An instance of the configuration object
9585 @param ec_id: Execution context ID
9587 @returns: The build up nics
9592 nic_mode_req = nic.get(constants.INIC_MODE, None)
9593 nic_mode = nic_mode_req
9594 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9595 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9597 net = nic.get(constants.INIC_NETWORK, None)
9598 link = nic.get(constants.NIC_LINK, None)
9599 ip = nic.get(constants.INIC_IP, None)
9601 if net is None or net.lower() == constants.VALUE_NONE:
9604 if nic_mode_req is not None or link is not None:
9605 raise errors.OpPrereqError("If network is given, no mode or link"
9606 " is allowed to be passed",
9609 # ip validity checks
9610 if ip is None or ip.lower() == constants.VALUE_NONE:
9612 elif ip.lower() == constants.VALUE_AUTO:
9613 if not op.name_check:
9614 raise errors.OpPrereqError("IP address set to auto but name checks"
9615 " have been skipped",
9619 # We defer pool operations until later, so that the iallocator has
9620 # filled in the instance's node(s) dimara
9621 if ip.lower() == constants.NIC_IP_POOL:
9623 raise errors.OpPrereqError("if ip=pool, parameter network"
9624 " must be passed too",
9627 elif not netutils.IPAddress.IsValid(ip):
9628 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9633 # TODO: check the ip address for uniqueness
9634 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9635 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9638 # MAC address verification
9639 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9640 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9641 mac = utils.NormalizeAndValidateMac(mac)
9644 # TODO: We need to factor this out
9645 cfg.ReserveMAC(mac, ec_id)
9646 except errors.ReservationError:
9647 raise errors.OpPrereqError("MAC address %s already in use"
9648 " in cluster" % mac,
9649 errors.ECODE_NOTUNIQUE)
9651 # Build nic parameters
9654 nicparams[constants.NIC_MODE] = nic_mode
9656 nicparams[constants.NIC_LINK] = link
9658 check_params = cluster.SimpleFillNIC(nicparams)
9659 objects.NIC.CheckParameterSyntax(check_params)
9660 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9661 network=net, nicparams=nicparams))
9666 def _ComputeDisks(op, default_vg):
9667 """Computes the instance disks.
9669 @param op: The instance opcode
9670 @param default_vg: The default_vg to assume
9672 @return: The computer disks
9676 for disk in op.disks:
9677 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9678 if mode not in constants.DISK_ACCESS_SET:
9679 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9680 mode, errors.ECODE_INVAL)
9681 size = disk.get(constants.IDISK_SIZE, None)
9683 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9686 except (TypeError, ValueError):
9687 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9690 data_vg = disk.get(constants.IDISK_VG, default_vg)
9692 constants.IDISK_SIZE: size,
9693 constants.IDISK_MODE: mode,
9694 constants.IDISK_VG: data_vg,
9696 if constants.IDISK_METAVG in disk:
9697 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9698 if constants.IDISK_ADOPT in disk:
9699 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9700 disks.append(new_disk)
9705 def _ComputeFullBeParams(op, cluster):
9706 """Computes the full beparams.
9708 @param op: The instance opcode
9709 @param cluster: The cluster config object
9711 @return: The fully filled beparams
9714 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9715 for param, value in op.beparams.iteritems():
9716 if value == constants.VALUE_AUTO:
9717 op.beparams[param] = default_beparams[param]
9718 objects.UpgradeBeParams(op.beparams)
9719 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9720 return cluster.SimpleFillBE(op.beparams)
9723 class LUInstanceCreate(LogicalUnit):
9724 """Create an instance.
9727 HPATH = "instance-add"
9728 HTYPE = constants.HTYPE_INSTANCE
9731 def CheckArguments(self):
9735 # do not require name_check to ease forward/backward compatibility
9737 if self.op.no_install and self.op.start:
9738 self.LogInfo("No-installation mode selected, disabling startup")
9739 self.op.start = False
9740 # validate/normalize the instance name
9741 self.op.instance_name = \
9742 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9744 if self.op.ip_check and not self.op.name_check:
9745 # TODO: make the ip check more flexible and not depend on the name check
9746 raise errors.OpPrereqError("Cannot do IP address check without a name"
9747 " check", errors.ECODE_INVAL)
9749 # check nics' parameter names
9750 for nic in self.op.nics:
9751 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9753 # check disks. parameter names and consistent adopt/no-adopt strategy
9754 has_adopt = has_no_adopt = False
9755 for disk in self.op.disks:
9756 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9757 if constants.IDISK_ADOPT in disk:
9761 if has_adopt and has_no_adopt:
9762 raise errors.OpPrereqError("Either all disks are adopted or none is",
9765 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9766 raise errors.OpPrereqError("Disk adoption is not supported for the"
9767 " '%s' disk template" %
9768 self.op.disk_template,
9770 if self.op.iallocator is not None:
9771 raise errors.OpPrereqError("Disk adoption not allowed with an"
9772 " iallocator script", errors.ECODE_INVAL)
9773 if self.op.mode == constants.INSTANCE_IMPORT:
9774 raise errors.OpPrereqError("Disk adoption not allowed for"
9775 " instance import", errors.ECODE_INVAL)
9777 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9778 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9779 " but no 'adopt' parameter given" %
9780 self.op.disk_template,
9783 self.adopt_disks = has_adopt
9785 # instance name verification
9786 if self.op.name_check:
9787 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9788 self.op.instance_name = self.hostname1.name
9789 # used in CheckPrereq for ip ping check
9790 self.check_ip = self.hostname1.ip
9792 self.check_ip = None
9794 # file storage checks
9795 if (self.op.file_driver and
9796 not self.op.file_driver in constants.FILE_DRIVER):
9797 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9798 self.op.file_driver, errors.ECODE_INVAL)
9800 if self.op.disk_template == constants.DT_FILE:
9801 opcodes.RequireFileStorage()
9802 elif self.op.disk_template == constants.DT_SHARED_FILE:
9803 opcodes.RequireSharedFileStorage()
9805 ### Node/iallocator related checks
9806 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9808 if self.op.pnode is not None:
9809 if self.op.disk_template in constants.DTS_INT_MIRROR:
9810 if self.op.snode is None:
9811 raise errors.OpPrereqError("The networked disk templates need"
9812 " a mirror node", errors.ECODE_INVAL)
9814 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9816 self.op.snode = None
9818 self._cds = _GetClusterDomainSecret()
9820 if self.op.mode == constants.INSTANCE_IMPORT:
9821 # On import force_variant must be True, because if we forced it at
9822 # initial install, our only chance when importing it back is that it
9824 self.op.force_variant = True
9826 if self.op.no_install:
9827 self.LogInfo("No-installation mode has no effect during import")
9829 elif self.op.mode == constants.INSTANCE_CREATE:
9830 if self.op.os_type is None:
9831 raise errors.OpPrereqError("No guest OS specified",
9833 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9834 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9835 " installation" % self.op.os_type,
9837 if self.op.disk_template is None:
9838 raise errors.OpPrereqError("No disk template specified",
9841 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9842 # Check handshake to ensure both clusters have the same domain secret
9843 src_handshake = self.op.source_handshake
9844 if not src_handshake:
9845 raise errors.OpPrereqError("Missing source handshake",
9848 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9851 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9854 # Load and check source CA
9855 self.source_x509_ca_pem = self.op.source_x509_ca
9856 if not self.source_x509_ca_pem:
9857 raise errors.OpPrereqError("Missing source X509 CA",
9861 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9863 except OpenSSL.crypto.Error, err:
9864 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9865 (err, ), errors.ECODE_INVAL)
9867 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9868 if errcode is not None:
9869 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9872 self.source_x509_ca = cert
9874 src_instance_name = self.op.source_instance_name
9875 if not src_instance_name:
9876 raise errors.OpPrereqError("Missing source instance name",
9879 self.source_instance_name = \
9880 netutils.GetHostname(name=src_instance_name).name
9883 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9884 self.op.mode, errors.ECODE_INVAL)
9886 def ExpandNames(self):
9887 """ExpandNames for CreateInstance.
9889 Figure out the right locks for instance creation.
9892 self.needed_locks = {}
9894 instance_name = self.op.instance_name
9895 # this is just a preventive check, but someone might still add this
9896 # instance in the meantime, and creation will fail at lock-add time
9897 if instance_name in self.cfg.GetInstanceList():
9898 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9899 instance_name, errors.ECODE_EXISTS)
9901 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9903 if self.op.iallocator:
9904 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9905 # specifying a group on instance creation and then selecting nodes from
9907 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9908 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9910 if self.op.opportunistic_locking:
9911 self.opportunistic_locks[locking.LEVEL_NODE] = True
9912 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
9914 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9915 nodelist = [self.op.pnode]
9916 if self.op.snode is not None:
9917 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9918 nodelist.append(self.op.snode)
9919 self.needed_locks[locking.LEVEL_NODE] = nodelist
9921 # in case of import lock the source node too
9922 if self.op.mode == constants.INSTANCE_IMPORT:
9923 src_node = self.op.src_node
9924 src_path = self.op.src_path
9926 if src_path is None:
9927 self.op.src_path = src_path = self.op.instance_name
9929 if src_node is None:
9930 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9931 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9932 self.op.src_node = None
9933 if os.path.isabs(src_path):
9934 raise errors.OpPrereqError("Importing an instance from a path"
9935 " requires a source node option",
9938 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9939 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9940 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9941 if not os.path.isabs(src_path):
9942 self.op.src_path = src_path = \
9943 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9945 self.needed_locks[locking.LEVEL_NODE_RES] = \
9946 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9948 def _RunAllocator(self):
9949 """Run the allocator based on input opcode.
9952 if self.op.opportunistic_locking:
9953 # Only consider nodes for which a lock is held
9954 node_whitelist = self.owned_locks(locking.LEVEL_NODE)
9956 node_whitelist = None
9958 #TODO Export network to iallocator so that it chooses a pnode
9959 # in a nodegroup that has the desired network connected to
9960 req = _CreateInstanceAllocRequest(self.op, self.disks,
9961 self.nics, self.be_full,
9963 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9965 ial.Run(self.op.iallocator)
9968 # When opportunistic locks are used only a temporary failure is generated
9969 if self.op.opportunistic_locking:
9970 ecode = errors.ECODE_TEMP_NORES
9972 ecode = errors.ECODE_NORES
9974 raise errors.OpPrereqError("Can't compute nodes using"
9975 " iallocator '%s': %s" %
9976 (self.op.iallocator, ial.info),
9979 self.op.pnode = ial.result[0]
9980 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9981 self.op.instance_name, self.op.iallocator,
9982 utils.CommaJoin(ial.result))
9984 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9986 if req.RequiredNodes() == 2:
9987 self.op.snode = ial.result[1]
9989 def BuildHooksEnv(self):
9992 This runs on master, primary and secondary nodes of the instance.
9996 "ADD_MODE": self.op.mode,
9998 if self.op.mode == constants.INSTANCE_IMPORT:
9999 env["SRC_NODE"] = self.op.src_node
10000 env["SRC_PATH"] = self.op.src_path
10001 env["SRC_IMAGES"] = self.src_images
10003 env.update(_BuildInstanceHookEnv(
10004 name=self.op.instance_name,
10005 primary_node=self.op.pnode,
10006 secondary_nodes=self.secondaries,
10007 status=self.op.start,
10008 os_type=self.op.os_type,
10009 minmem=self.be_full[constants.BE_MINMEM],
10010 maxmem=self.be_full[constants.BE_MAXMEM],
10011 vcpus=self.be_full[constants.BE_VCPUS],
10012 nics=_NICListToTuple(self, self.nics),
10013 disk_template=self.op.disk_template,
10014 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10015 for d in self.disks],
10018 hypervisor_name=self.op.hypervisor,
10024 def BuildHooksNodes(self):
10025 """Build hooks nodes.
10028 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10031 def _ReadExportInfo(self):
10032 """Reads the export information from disk.
10034 It will override the opcode source node and path with the actual
10035 information, if these two were not specified before.
10037 @return: the export information
10040 assert self.op.mode == constants.INSTANCE_IMPORT
10042 src_node = self.op.src_node
10043 src_path = self.op.src_path
10045 if src_node is None:
10046 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10047 exp_list = self.rpc.call_export_list(locked_nodes)
10049 for node in exp_list:
10050 if exp_list[node].fail_msg:
10052 if src_path in exp_list[node].payload:
10054 self.op.src_node = src_node = node
10055 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10059 raise errors.OpPrereqError("No export found for relative path %s" %
10060 src_path, errors.ECODE_INVAL)
10062 _CheckNodeOnline(self, src_node)
10063 result = self.rpc.call_export_info(src_node, src_path)
10064 result.Raise("No export or invalid export found in dir %s" % src_path)
10066 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10067 if not export_info.has_section(constants.INISECT_EXP):
10068 raise errors.ProgrammerError("Corrupted export config",
10069 errors.ECODE_ENVIRON)
10071 ei_version = export_info.get(constants.INISECT_EXP, "version")
10072 if (int(ei_version) != constants.EXPORT_VERSION):
10073 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10074 (ei_version, constants.EXPORT_VERSION),
10075 errors.ECODE_ENVIRON)
10078 def _ReadExportParams(self, einfo):
10079 """Use export parameters as defaults.
10081 In case the opcode doesn't specify (as in override) some instance
10082 parameters, then try to use them from the export information, if
10083 that declares them.
10086 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10088 if self.op.disk_template is None:
10089 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10090 self.op.disk_template = einfo.get(constants.INISECT_INS,
10092 if self.op.disk_template not in constants.DISK_TEMPLATES:
10093 raise errors.OpPrereqError("Disk template specified in configuration"
10094 " file is not one of the allowed values:"
10096 " ".join(constants.DISK_TEMPLATES),
10097 errors.ECODE_INVAL)
10099 raise errors.OpPrereqError("No disk template specified and the export"
10100 " is missing the disk_template information",
10101 errors.ECODE_INVAL)
10103 if not self.op.disks:
10105 # TODO: import the disk iv_name too
10106 for idx in range(constants.MAX_DISKS):
10107 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10108 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10109 disks.append({constants.IDISK_SIZE: disk_sz})
10110 self.op.disks = disks
10111 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10112 raise errors.OpPrereqError("No disk info specified and the export"
10113 " is missing the disk information",
10114 errors.ECODE_INVAL)
10116 if not self.op.nics:
10118 for idx in range(constants.MAX_NICS):
10119 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10121 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10122 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10127 self.op.nics = nics
10129 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10130 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10132 if (self.op.hypervisor is None and
10133 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10134 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10136 if einfo.has_section(constants.INISECT_HYP):
10137 # use the export parameters but do not override the ones
10138 # specified by the user
10139 for name, value in einfo.items(constants.INISECT_HYP):
10140 if name not in self.op.hvparams:
10141 self.op.hvparams[name] = value
10143 if einfo.has_section(constants.INISECT_BEP):
10144 # use the parameters, without overriding
10145 for name, value in einfo.items(constants.INISECT_BEP):
10146 if name not in self.op.beparams:
10147 self.op.beparams[name] = value
10148 # Compatibility for the old "memory" be param
10149 if name == constants.BE_MEMORY:
10150 if constants.BE_MAXMEM not in self.op.beparams:
10151 self.op.beparams[constants.BE_MAXMEM] = value
10152 if constants.BE_MINMEM not in self.op.beparams:
10153 self.op.beparams[constants.BE_MINMEM] = value
10155 # try to read the parameters old style, from the main section
10156 for name in constants.BES_PARAMETERS:
10157 if (name not in self.op.beparams and
10158 einfo.has_option(constants.INISECT_INS, name)):
10159 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10161 if einfo.has_section(constants.INISECT_OSP):
10162 # use the parameters, without overriding
10163 for name, value in einfo.items(constants.INISECT_OSP):
10164 if name not in self.op.osparams:
10165 self.op.osparams[name] = value
10167 def _RevertToDefaults(self, cluster):
10168 """Revert the instance parameters to the default values.
10172 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10173 for name in self.op.hvparams.keys():
10174 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10175 del self.op.hvparams[name]
10177 be_defs = cluster.SimpleFillBE({})
10178 for name in self.op.beparams.keys():
10179 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10180 del self.op.beparams[name]
10182 nic_defs = cluster.SimpleFillNIC({})
10183 for nic in self.op.nics:
10184 for name in constants.NICS_PARAMETERS:
10185 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10188 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10189 for name in self.op.osparams.keys():
10190 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10191 del self.op.osparams[name]
10193 def _CalculateFileStorageDir(self):
10194 """Calculate final instance file storage dir.
10197 # file storage dir calculation/check
10198 self.instance_file_storage_dir = None
10199 if self.op.disk_template in constants.DTS_FILEBASED:
10200 # build the full file storage dir path
10203 if self.op.disk_template == constants.DT_SHARED_FILE:
10204 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10206 get_fsd_fn = self.cfg.GetFileStorageDir
10208 cfg_storagedir = get_fsd_fn()
10209 if not cfg_storagedir:
10210 raise errors.OpPrereqError("Cluster file storage dir not defined",
10211 errors.ECODE_STATE)
10212 joinargs.append(cfg_storagedir)
10214 if self.op.file_storage_dir is not None:
10215 joinargs.append(self.op.file_storage_dir)
10217 joinargs.append(self.op.instance_name)
10219 # pylint: disable=W0142
10220 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10222 def CheckPrereq(self): # pylint: disable=R0914
10223 """Check prerequisites.
10226 self._CalculateFileStorageDir()
10228 if self.op.mode == constants.INSTANCE_IMPORT:
10229 export_info = self._ReadExportInfo()
10230 self._ReadExportParams(export_info)
10231 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10233 self._old_instance_name = None
10235 if (not self.cfg.GetVGName() and
10236 self.op.disk_template not in constants.DTS_NOT_LVM):
10237 raise errors.OpPrereqError("Cluster does not support lvm-based"
10238 " instances", errors.ECODE_STATE)
10240 if (self.op.hypervisor is None or
10241 self.op.hypervisor == constants.VALUE_AUTO):
10242 self.op.hypervisor = self.cfg.GetHypervisorType()
10244 cluster = self.cfg.GetClusterInfo()
10245 enabled_hvs = cluster.enabled_hypervisors
10246 if self.op.hypervisor not in enabled_hvs:
10247 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10249 (self.op.hypervisor, ",".join(enabled_hvs)),
10250 errors.ECODE_STATE)
10252 # Check tag validity
10253 for tag in self.op.tags:
10254 objects.TaggableObject.ValidateTag(tag)
10256 # check hypervisor parameter syntax (locally)
10257 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10258 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10260 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10261 hv_type.CheckParameterSyntax(filled_hvp)
10262 self.hv_full = filled_hvp
10263 # check that we don't specify global parameters on an instance
10264 _CheckGlobalHvParams(self.op.hvparams)
10266 # fill and remember the beparams dict
10267 self.be_full = _ComputeFullBeParams(self.op, cluster)
10269 # build os parameters
10270 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10272 # now that hvp/bep are in final format, let's reset to defaults,
10274 if self.op.identify_defaults:
10275 self._RevertToDefaults(cluster)
10278 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10279 self.proc.GetECId())
10281 # disk checks/pre-build
10282 default_vg = self.cfg.GetVGName()
10283 self.disks = _ComputeDisks(self.op, default_vg)
10285 if self.op.mode == constants.INSTANCE_IMPORT:
10287 for idx in range(len(self.disks)):
10288 option = "disk%d_dump" % idx
10289 if export_info.has_option(constants.INISECT_INS, option):
10290 # FIXME: are the old os-es, disk sizes, etc. useful?
10291 export_name = export_info.get(constants.INISECT_INS, option)
10292 image = utils.PathJoin(self.op.src_path, export_name)
10293 disk_images.append(image)
10295 disk_images.append(False)
10297 self.src_images = disk_images
10299 if self.op.instance_name == self._old_instance_name:
10300 for idx, nic in enumerate(self.nics):
10301 if nic.mac == constants.VALUE_AUTO:
10302 nic_mac_ini = "nic%d_mac" % idx
10303 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10305 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10307 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10308 if self.op.ip_check:
10309 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10310 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10311 (self.check_ip, self.op.instance_name),
10312 errors.ECODE_NOTUNIQUE)
10314 #### mac address generation
10315 # By generating here the mac address both the allocator and the hooks get
10316 # the real final mac address rather than the 'auto' or 'generate' value.
10317 # There is a race condition between the generation and the instance object
10318 # creation, which means that we know the mac is valid now, but we're not
10319 # sure it will be when we actually add the instance. If things go bad
10320 # adding the instance will abort because of a duplicate mac, and the
10321 # creation job will fail.
10322 for nic in self.nics:
10323 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10324 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10328 if self.op.iallocator is not None:
10329 self._RunAllocator()
10331 # Release all unneeded node locks
10332 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10333 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10334 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10335 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10337 assert (self.owned_locks(locking.LEVEL_NODE) ==
10338 self.owned_locks(locking.LEVEL_NODE_RES)), \
10339 "Node locks differ from node resource locks"
10341 #### node related checks
10343 # check primary node
10344 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10345 assert self.pnode is not None, \
10346 "Cannot retrieve locked node %s" % self.op.pnode
10348 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10349 pnode.name, errors.ECODE_STATE)
10351 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10352 pnode.name, errors.ECODE_STATE)
10353 if not pnode.vm_capable:
10354 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10355 " '%s'" % pnode.name, errors.ECODE_STATE)
10357 self.secondaries = []
10359 # Fill in any IPs from IP pools. This must happen here, because we need to
10360 # know the nic's primary node, as specified by the iallocator
10361 for idx, nic in enumerate(self.nics):
10363 if net is not None:
10364 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10365 if netparams is None:
10366 raise errors.OpPrereqError("No netparams found for network"
10367 " %s. Propably not connected to"
10368 " node's %s nodegroup" %
10369 (net, self.pnode.name),
10370 errors.ECODE_INVAL)
10371 self.LogInfo("NIC/%d inherits netparams %s" %
10372 (idx, netparams.values()))
10373 nic.nicparams = dict(netparams)
10374 if nic.ip is not None:
10375 if nic.ip.lower() == constants.NIC_IP_POOL:
10377 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10378 except errors.ReservationError:
10379 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10380 " from the address pool" % idx,
10381 errors.ECODE_STATE)
10382 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10385 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10386 except errors.ReservationError:
10387 raise errors.OpPrereqError("IP address %s already in use"
10388 " or does not belong to network %s" %
10390 errors.ECODE_NOTUNIQUE)
10392 # net is None, ip None or given
10393 if self.op.conflicts_check:
10394 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10396 # mirror node verification
10397 if self.op.disk_template in constants.DTS_INT_MIRROR:
10398 if self.op.snode == pnode.name:
10399 raise errors.OpPrereqError("The secondary node cannot be the"
10400 " primary node", errors.ECODE_INVAL)
10401 _CheckNodeOnline(self, self.op.snode)
10402 _CheckNodeNotDrained(self, self.op.snode)
10403 _CheckNodeVmCapable(self, self.op.snode)
10404 self.secondaries.append(self.op.snode)
10406 snode = self.cfg.GetNodeInfo(self.op.snode)
10407 if pnode.group != snode.group:
10408 self.LogWarning("The primary and secondary nodes are in two"
10409 " different node groups; the disk parameters"
10410 " from the first disk's node group will be"
10413 nodenames = [pnode.name] + self.secondaries
10415 # Verify instance specs
10416 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10418 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10419 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10420 constants.ISPEC_DISK_COUNT: len(self.disks),
10421 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10422 constants.ISPEC_NIC_COUNT: len(self.nics),
10423 constants.ISPEC_SPINDLE_USE: spindle_use,
10426 group_info = self.cfg.GetNodeGroup(pnode.group)
10427 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10428 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10429 if not self.op.ignore_ipolicy and res:
10430 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10431 (pnode.group, group_info.name, utils.CommaJoin(res)))
10432 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10434 if not self.adopt_disks:
10435 if self.op.disk_template == constants.DT_RBD:
10436 # _CheckRADOSFreeSpace() is just a placeholder.
10437 # Any function that checks prerequisites can be placed here.
10438 # Check if there is enough space on the RADOS cluster.
10439 _CheckRADOSFreeSpace()
10441 # Check lv size requirements, if not adopting
10442 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10443 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10445 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10446 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10447 disk[constants.IDISK_ADOPT])
10448 for disk in self.disks])
10449 if len(all_lvs) != len(self.disks):
10450 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10451 errors.ECODE_INVAL)
10452 for lv_name in all_lvs:
10454 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10455 # to ReserveLV uses the same syntax
10456 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10457 except errors.ReservationError:
10458 raise errors.OpPrereqError("LV named %s used by another instance" %
10459 lv_name, errors.ECODE_NOTUNIQUE)
10461 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10462 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10464 node_lvs = self.rpc.call_lv_list([pnode.name],
10465 vg_names.payload.keys())[pnode.name]
10466 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10467 node_lvs = node_lvs.payload
10469 delta = all_lvs.difference(node_lvs.keys())
10471 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10472 utils.CommaJoin(delta),
10473 errors.ECODE_INVAL)
10474 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10476 raise errors.OpPrereqError("Online logical volumes found, cannot"
10477 " adopt: %s" % utils.CommaJoin(online_lvs),
10478 errors.ECODE_STATE)
10479 # update the size of disk based on what is found
10480 for dsk in self.disks:
10481 dsk[constants.IDISK_SIZE] = \
10482 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10483 dsk[constants.IDISK_ADOPT])][0]))
10485 elif self.op.disk_template == constants.DT_BLOCK:
10486 # Normalize and de-duplicate device paths
10487 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10488 for disk in self.disks])
10489 if len(all_disks) != len(self.disks):
10490 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10491 errors.ECODE_INVAL)
10492 baddisks = [d for d in all_disks
10493 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10495 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10496 " cannot be adopted" %
10497 (utils.CommaJoin(baddisks),
10498 constants.ADOPTABLE_BLOCKDEV_ROOT),
10499 errors.ECODE_INVAL)
10501 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10502 list(all_disks))[pnode.name]
10503 node_disks.Raise("Cannot get block device information from node %s" %
10505 node_disks = node_disks.payload
10506 delta = all_disks.difference(node_disks.keys())
10508 raise errors.OpPrereqError("Missing block device(s): %s" %
10509 utils.CommaJoin(delta),
10510 errors.ECODE_INVAL)
10511 for dsk in self.disks:
10512 dsk[constants.IDISK_SIZE] = \
10513 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10515 # Verify instance specs
10516 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10518 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10519 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10520 constants.ISPEC_DISK_COUNT: len(self.disks),
10521 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10522 for disk in self.disks],
10523 constants.ISPEC_NIC_COUNT: len(self.nics),
10524 constants.ISPEC_SPINDLE_USE: spindle_use,
10527 group_info = self.cfg.GetNodeGroup(pnode.group)
10528 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10529 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10530 if not self.op.ignore_ipolicy and res:
10531 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10532 " policy: %s") % (pnode.group,
10533 utils.CommaJoin(res)),
10534 errors.ECODE_INVAL)
10536 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10538 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10539 # check OS parameters (remotely)
10540 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10542 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10544 # memory check on primary node
10545 #TODO(dynmem): use MINMEM for checking
10547 _CheckNodeFreeMemory(self, self.pnode.name,
10548 "creating instance %s" % self.op.instance_name,
10549 self.be_full[constants.BE_MAXMEM],
10550 self.op.hypervisor)
10552 self.dry_run_result = list(nodenames)
10554 def Exec(self, feedback_fn):
10555 """Create and add the instance to the cluster.
10558 instance = self.op.instance_name
10559 pnode_name = self.pnode.name
10561 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10562 self.owned_locks(locking.LEVEL_NODE)), \
10563 "Node locks differ from node resource locks"
10564 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10566 ht_kind = self.op.hypervisor
10567 if ht_kind in constants.HTS_REQ_PORT:
10568 network_port = self.cfg.AllocatePort()
10570 network_port = None
10572 # This is ugly but we got a chicken-egg problem here
10573 # We can only take the group disk parameters, as the instance
10574 # has no disks yet (we are generating them right here).
10575 node = self.cfg.GetNodeInfo(pnode_name)
10576 nodegroup = self.cfg.GetNodeGroup(node.group)
10577 disks = _GenerateDiskTemplate(self,
10578 self.op.disk_template,
10579 instance, pnode_name,
10582 self.instance_file_storage_dir,
10583 self.op.file_driver,
10586 self.cfg.GetGroupDiskParams(nodegroup))
10588 iobj = objects.Instance(name=instance, os=self.op.os_type,
10589 primary_node=pnode_name,
10590 nics=self.nics, disks=disks,
10591 disk_template=self.op.disk_template,
10592 admin_state=constants.ADMINST_DOWN,
10593 network_port=network_port,
10594 beparams=self.op.beparams,
10595 hvparams=self.op.hvparams,
10596 hypervisor=self.op.hypervisor,
10597 osparams=self.op.osparams,
10601 for tag in self.op.tags:
10604 if self.adopt_disks:
10605 if self.op.disk_template == constants.DT_PLAIN:
10606 # rename LVs to the newly-generated names; we need to construct
10607 # 'fake' LV disks with the old data, plus the new unique_id
10608 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10610 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10611 rename_to.append(t_dsk.logical_id)
10612 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10613 self.cfg.SetDiskID(t_dsk, pnode_name)
10614 result = self.rpc.call_blockdev_rename(pnode_name,
10615 zip(tmp_disks, rename_to))
10616 result.Raise("Failed to rename adoped LVs")
10618 feedback_fn("* creating instance disks...")
10620 _CreateDisks(self, iobj)
10621 except errors.OpExecError:
10622 self.LogWarning("Device creation failed, reverting...")
10624 _RemoveDisks(self, iobj)
10626 self.cfg.ReleaseDRBDMinors(instance)
10629 feedback_fn("adding instance %s to cluster config" % instance)
10631 self.cfg.AddInstance(iobj, self.proc.GetECId())
10633 # Declare that we don't want to remove the instance lock anymore, as we've
10634 # added the instance to the config
10635 del self.remove_locks[locking.LEVEL_INSTANCE]
10637 if self.op.mode == constants.INSTANCE_IMPORT:
10638 # Release unused nodes
10639 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10641 # Release all nodes
10642 _ReleaseLocks(self, locking.LEVEL_NODE)
10645 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10646 feedback_fn("* wiping instance disks...")
10648 _WipeDisks(self, iobj)
10649 except errors.OpExecError, err:
10650 logging.exception("Wiping disks failed")
10651 self.LogWarning("Wiping instance disks failed (%s)", err)
10655 # Something is already wrong with the disks, don't do anything else
10657 elif self.op.wait_for_sync:
10658 disk_abort = not _WaitForSync(self, iobj)
10659 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10660 # make sure the disks are not degraded (still sync-ing is ok)
10661 feedback_fn("* checking mirrors status")
10662 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10667 _RemoveDisks(self, iobj)
10668 self.cfg.RemoveInstance(iobj.name)
10669 # Make sure the instance lock gets removed
10670 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10671 raise errors.OpExecError("There are some degraded disks for"
10674 # Release all node resource locks
10675 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10677 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10678 # we need to set the disks ID to the primary node, since the
10679 # preceding code might or might have not done it, depending on
10680 # disk template and other options
10681 for disk in iobj.disks:
10682 self.cfg.SetDiskID(disk, pnode_name)
10683 if self.op.mode == constants.INSTANCE_CREATE:
10684 if not self.op.no_install:
10685 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10686 not self.op.wait_for_sync)
10688 feedback_fn("* pausing disk sync to install instance OS")
10689 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10692 for idx, success in enumerate(result.payload):
10694 logging.warn("pause-sync of instance %s for disk %d failed",
10697 feedback_fn("* running the instance OS create scripts...")
10698 # FIXME: pass debug option from opcode to backend
10700 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10701 self.op.debug_level)
10703 feedback_fn("* resuming disk sync")
10704 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10707 for idx, success in enumerate(result.payload):
10709 logging.warn("resume-sync of instance %s for disk %d failed",
10712 os_add_result.Raise("Could not add os for instance %s"
10713 " on node %s" % (instance, pnode_name))
10716 if self.op.mode == constants.INSTANCE_IMPORT:
10717 feedback_fn("* running the instance OS import scripts...")
10721 for idx, image in enumerate(self.src_images):
10725 # FIXME: pass debug option from opcode to backend
10726 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10727 constants.IEIO_FILE, (image, ),
10728 constants.IEIO_SCRIPT,
10729 (iobj.disks[idx], idx),
10731 transfers.append(dt)
10734 masterd.instance.TransferInstanceData(self, feedback_fn,
10735 self.op.src_node, pnode_name,
10736 self.pnode.secondary_ip,
10738 if not compat.all(import_result):
10739 self.LogWarning("Some disks for instance %s on node %s were not"
10740 " imported successfully" % (instance, pnode_name))
10742 rename_from = self._old_instance_name
10744 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10745 feedback_fn("* preparing remote import...")
10746 # The source cluster will stop the instance before attempting to make
10747 # a connection. In some cases stopping an instance can take a long
10748 # time, hence the shutdown timeout is added to the connection
10750 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10751 self.op.source_shutdown_timeout)
10752 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10754 assert iobj.primary_node == self.pnode.name
10756 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10757 self.source_x509_ca,
10758 self._cds, timeouts)
10759 if not compat.all(disk_results):
10760 # TODO: Should the instance still be started, even if some disks
10761 # failed to import (valid for local imports, too)?
10762 self.LogWarning("Some disks for instance %s on node %s were not"
10763 " imported successfully" % (instance, pnode_name))
10765 rename_from = self.source_instance_name
10768 # also checked in the prereq part
10769 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10772 # Run rename script on newly imported instance
10773 assert iobj.name == instance
10774 feedback_fn("Running rename script for %s" % instance)
10775 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10777 self.op.debug_level)
10778 if result.fail_msg:
10779 self.LogWarning("Failed to run rename script for %s on node"
10780 " %s: %s" % (instance, pnode_name, result.fail_msg))
10782 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10785 iobj.admin_state = constants.ADMINST_UP
10786 self.cfg.Update(iobj, feedback_fn)
10787 logging.info("Starting instance %s on node %s", instance, pnode_name)
10788 feedback_fn("* starting instance...")
10789 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10791 result.Raise("Could not start instance")
10793 return list(iobj.all_nodes)
10796 class LUInstanceMultiAlloc(NoHooksLU):
10797 """Allocates multiple instances at the same time.
10802 def CheckArguments(self):
10803 """Check arguments.
10807 for inst in self.op.instances:
10808 if inst.iallocator is not None:
10809 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10810 " instance objects", errors.ECODE_INVAL)
10811 nodes.append(bool(inst.pnode))
10812 if inst.disk_template in constants.DTS_INT_MIRROR:
10813 nodes.append(bool(inst.snode))
10815 has_nodes = compat.any(nodes)
10816 if compat.all(nodes) ^ has_nodes:
10817 raise errors.OpPrereqError("There are instance objects providing"
10818 " pnode/snode while others do not",
10819 errors.ECODE_INVAL)
10821 if self.op.iallocator is None:
10822 default_iallocator = self.cfg.GetDefaultIAllocator()
10823 if default_iallocator and has_nodes:
10824 self.op.iallocator = default_iallocator
10826 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10827 " given and no cluster-wide default"
10828 " iallocator found; please specify either"
10829 " an iallocator or nodes on the instances"
10830 " or set a cluster-wide default iallocator",
10831 errors.ECODE_INVAL)
10833 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10835 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10836 utils.CommaJoin(dups), errors.ECODE_INVAL)
10838 def ExpandNames(self):
10839 """Calculate the locks.
10842 self.share_locks = _ShareAll()
10843 self.needed_locks = {
10844 # iallocator will select nodes and even if no iallocator is used,
10845 # collisions with LUInstanceCreate should be avoided
10846 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10849 if self.op.iallocator:
10850 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10851 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10853 if self.op.opportunistic_locking:
10854 self.opportunistic_locks[locking.LEVEL_NODE] = True
10855 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10858 for inst in self.op.instances:
10859 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10860 nodeslist.append(inst.pnode)
10861 if inst.snode is not None:
10862 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10863 nodeslist.append(inst.snode)
10865 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10866 # Lock resources of instance's primary and secondary nodes (copy to
10867 # prevent accidential modification)
10868 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10870 def CheckPrereq(self):
10871 """Check prerequisite.
10874 cluster = self.cfg.GetClusterInfo()
10875 default_vg = self.cfg.GetVGName()
10876 ec_id = self.proc.GetECId()
10878 if self.op.opportunistic_locking:
10879 # Only consider nodes for which a lock is held
10880 node_whitelist = self.owned_locks(locking.LEVEL_NODE)
10882 node_whitelist = None
10884 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10885 _ComputeNics(op, cluster, None,
10887 _ComputeFullBeParams(op, cluster),
10889 for op in self.op.instances]
10891 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10892 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10894 ial.Run(self.op.iallocator)
10896 if not ial.success:
10897 raise errors.OpPrereqError("Can't compute nodes using"
10898 " iallocator '%s': %s" %
10899 (self.op.iallocator, ial.info),
10900 errors.ECODE_NORES)
10902 self.ia_result = ial.result
10904 if self.op.dry_run:
10905 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
10906 constants.JOB_IDS_KEY: [],
10909 def _ConstructPartialResult(self):
10910 """Contructs the partial result.
10913 (allocatable, failed) = self.ia_result
10915 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10916 map(compat.fst, allocatable),
10917 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10920 def Exec(self, feedback_fn):
10921 """Executes the opcode.
10924 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10925 (allocatable, failed) = self.ia_result
10928 for (name, nodes) in allocatable:
10929 op = op2inst.pop(name)
10932 (op.pnode, op.snode) = nodes
10934 (op.pnode,) = nodes
10938 missing = set(op2inst.keys()) - set(failed)
10939 assert not missing, \
10940 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10942 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10945 def _CheckRADOSFreeSpace():
10946 """Compute disk size requirements inside the RADOS cluster.
10949 # For the RADOS cluster we assume there is always enough space.
10953 class LUInstanceConsole(NoHooksLU):
10954 """Connect to an instance's console.
10956 This is somewhat special in that it returns the command line that
10957 you need to run on the master node in order to connect to the
10963 def ExpandNames(self):
10964 self.share_locks = _ShareAll()
10965 self._ExpandAndLockInstance()
10967 def CheckPrereq(self):
10968 """Check prerequisites.
10970 This checks that the instance is in the cluster.
10973 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10974 assert self.instance is not None, \
10975 "Cannot retrieve locked instance %s" % self.op.instance_name
10976 _CheckNodeOnline(self, self.instance.primary_node)
10978 def Exec(self, feedback_fn):
10979 """Connect to the console of an instance
10982 instance = self.instance
10983 node = instance.primary_node
10985 node_insts = self.rpc.call_instance_list([node],
10986 [instance.hypervisor])[node]
10987 node_insts.Raise("Can't get node information from %s" % node)
10989 if instance.name not in node_insts.payload:
10990 if instance.admin_state == constants.ADMINST_UP:
10991 state = constants.INSTST_ERRORDOWN
10992 elif instance.admin_state == constants.ADMINST_DOWN:
10993 state = constants.INSTST_ADMINDOWN
10995 state = constants.INSTST_ADMINOFFLINE
10996 raise errors.OpExecError("Instance %s is not running (state %s)" %
10997 (instance.name, state))
10999 logging.debug("Connecting to console of %s on %s", instance.name, node)
11001 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11004 def _GetInstanceConsole(cluster, instance):
11005 """Returns console information for an instance.
11007 @type cluster: L{objects.Cluster}
11008 @type instance: L{objects.Instance}
11012 hyper = hypervisor.GetHypervisor(instance.hypervisor)
11013 # beparams and hvparams are passed separately, to avoid editing the
11014 # instance and then saving the defaults in the instance itself.
11015 hvparams = cluster.FillHV(instance)
11016 beparams = cluster.FillBE(instance)
11017 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11019 assert console.instance == instance.name
11020 assert console.Validate()
11022 return console.ToDict()
11025 class LUInstanceReplaceDisks(LogicalUnit):
11026 """Replace the disks of an instance.
11029 HPATH = "mirrors-replace"
11030 HTYPE = constants.HTYPE_INSTANCE
11033 def CheckArguments(self):
11034 """Check arguments.
11037 remote_node = self.op.remote_node
11038 ialloc = self.op.iallocator
11039 if self.op.mode == constants.REPLACE_DISK_CHG:
11040 if remote_node is None and ialloc is None:
11041 raise errors.OpPrereqError("When changing the secondary either an"
11042 " iallocator script must be used or the"
11043 " new node given", errors.ECODE_INVAL)
11045 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11047 elif remote_node is not None or ialloc is not None:
11048 # Not replacing the secondary
11049 raise errors.OpPrereqError("The iallocator and new node options can"
11050 " only be used when changing the"
11051 " secondary node", errors.ECODE_INVAL)
11053 def ExpandNames(self):
11054 self._ExpandAndLockInstance()
11056 assert locking.LEVEL_NODE not in self.needed_locks
11057 assert locking.LEVEL_NODE_RES not in self.needed_locks
11058 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11060 assert self.op.iallocator is None or self.op.remote_node is None, \
11061 "Conflicting options"
11063 if self.op.remote_node is not None:
11064 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11066 # Warning: do not remove the locking of the new secondary here
11067 # unless DRBD8.AddChildren is changed to work in parallel;
11068 # currently it doesn't since parallel invocations of
11069 # FindUnusedMinor will conflict
11070 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11071 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11073 self.needed_locks[locking.LEVEL_NODE] = []
11074 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11076 if self.op.iallocator is not None:
11077 # iallocator will select a new node in the same group
11078 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11079 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11081 self.needed_locks[locking.LEVEL_NODE_RES] = []
11083 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11084 self.op.iallocator, self.op.remote_node,
11085 self.op.disks, self.op.early_release,
11086 self.op.ignore_ipolicy)
11088 self.tasklets = [self.replacer]
11090 def DeclareLocks(self, level):
11091 if level == locking.LEVEL_NODEGROUP:
11092 assert self.op.remote_node is None
11093 assert self.op.iallocator is not None
11094 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11096 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11097 # Lock all groups used by instance optimistically; this requires going
11098 # via the node before it's locked, requiring verification later on
11099 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11100 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11102 elif level == locking.LEVEL_NODE:
11103 if self.op.iallocator is not None:
11104 assert self.op.remote_node is None
11105 assert not self.needed_locks[locking.LEVEL_NODE]
11106 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11108 # Lock member nodes of all locked groups
11109 self.needed_locks[locking.LEVEL_NODE] = \
11111 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11112 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11114 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11116 self._LockInstancesNodes()
11118 elif level == locking.LEVEL_NODE_RES:
11120 self.needed_locks[locking.LEVEL_NODE_RES] = \
11121 self.needed_locks[locking.LEVEL_NODE]
11123 def BuildHooksEnv(self):
11124 """Build hooks env.
11126 This runs on the master, the primary and all the secondaries.
11129 instance = self.replacer.instance
11131 "MODE": self.op.mode,
11132 "NEW_SECONDARY": self.op.remote_node,
11133 "OLD_SECONDARY": instance.secondary_nodes[0],
11135 env.update(_BuildInstanceHookEnvByObject(self, instance))
11138 def BuildHooksNodes(self):
11139 """Build hooks nodes.
11142 instance = self.replacer.instance
11144 self.cfg.GetMasterNode(),
11145 instance.primary_node,
11147 if self.op.remote_node is not None:
11148 nl.append(self.op.remote_node)
11151 def CheckPrereq(self):
11152 """Check prerequisites.
11155 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11156 self.op.iallocator is None)
11158 # Verify if node group locks are still correct
11159 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11161 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11163 return LogicalUnit.CheckPrereq(self)
11166 class TLReplaceDisks(Tasklet):
11167 """Replaces disks for an instance.
11169 Note: Locking is not within the scope of this class.
11172 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11173 disks, early_release, ignore_ipolicy):
11174 """Initializes this class.
11177 Tasklet.__init__(self, lu)
11180 self.instance_name = instance_name
11182 self.iallocator_name = iallocator_name
11183 self.remote_node = remote_node
11185 self.early_release = early_release
11186 self.ignore_ipolicy = ignore_ipolicy
11189 self.instance = None
11190 self.new_node = None
11191 self.target_node = None
11192 self.other_node = None
11193 self.remote_node_info = None
11194 self.node_secondary_ip = None
11197 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11198 """Compute a new secondary node using an IAllocator.
11201 req = iallocator.IAReqRelocate(name=instance_name,
11202 relocate_from=list(relocate_from))
11203 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11205 ial.Run(iallocator_name)
11207 if not ial.success:
11208 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11209 " %s" % (iallocator_name, ial.info),
11210 errors.ECODE_NORES)
11212 remote_node_name = ial.result[0]
11214 lu.LogInfo("Selected new secondary for instance '%s': %s",
11215 instance_name, remote_node_name)
11217 return remote_node_name
11219 def _FindFaultyDisks(self, node_name):
11220 """Wrapper for L{_FindFaultyInstanceDisks}.
11223 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11226 def _CheckDisksActivated(self, instance):
11227 """Checks if the instance disks are activated.
11229 @param instance: The instance to check disks
11230 @return: True if they are activated, False otherwise
11233 nodes = instance.all_nodes
11235 for idx, dev in enumerate(instance.disks):
11237 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11238 self.cfg.SetDiskID(dev, node)
11240 result = _BlockdevFind(self, node, dev, instance)
11244 elif result.fail_msg or not result.payload:
11249 def CheckPrereq(self):
11250 """Check prerequisites.
11252 This checks that the instance is in the cluster.
11255 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11256 assert instance is not None, \
11257 "Cannot retrieve locked instance %s" % self.instance_name
11259 if instance.disk_template != constants.DT_DRBD8:
11260 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11261 " instances", errors.ECODE_INVAL)
11263 if len(instance.secondary_nodes) != 1:
11264 raise errors.OpPrereqError("The instance has a strange layout,"
11265 " expected one secondary but found %d" %
11266 len(instance.secondary_nodes),
11267 errors.ECODE_FAULT)
11269 instance = self.instance
11270 secondary_node = instance.secondary_nodes[0]
11272 if self.iallocator_name is None:
11273 remote_node = self.remote_node
11275 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11276 instance.name, instance.secondary_nodes)
11278 if remote_node is None:
11279 self.remote_node_info = None
11281 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11282 "Remote node '%s' is not locked" % remote_node
11284 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11285 assert self.remote_node_info is not None, \
11286 "Cannot retrieve locked node %s" % remote_node
11288 if remote_node == self.instance.primary_node:
11289 raise errors.OpPrereqError("The specified node is the primary node of"
11290 " the instance", errors.ECODE_INVAL)
11292 if remote_node == secondary_node:
11293 raise errors.OpPrereqError("The specified node is already the"
11294 " secondary node of the instance",
11295 errors.ECODE_INVAL)
11297 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11298 constants.REPLACE_DISK_CHG):
11299 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11300 errors.ECODE_INVAL)
11302 if self.mode == constants.REPLACE_DISK_AUTO:
11303 if not self._CheckDisksActivated(instance):
11304 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11305 " first" % self.instance_name,
11306 errors.ECODE_STATE)
11307 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11308 faulty_secondary = self._FindFaultyDisks(secondary_node)
11310 if faulty_primary and faulty_secondary:
11311 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11312 " one node and can not be repaired"
11313 " automatically" % self.instance_name,
11314 errors.ECODE_STATE)
11317 self.disks = faulty_primary
11318 self.target_node = instance.primary_node
11319 self.other_node = secondary_node
11320 check_nodes = [self.target_node, self.other_node]
11321 elif faulty_secondary:
11322 self.disks = faulty_secondary
11323 self.target_node = secondary_node
11324 self.other_node = instance.primary_node
11325 check_nodes = [self.target_node, self.other_node]
11331 # Non-automatic modes
11332 if self.mode == constants.REPLACE_DISK_PRI:
11333 self.target_node = instance.primary_node
11334 self.other_node = secondary_node
11335 check_nodes = [self.target_node, self.other_node]
11337 elif self.mode == constants.REPLACE_DISK_SEC:
11338 self.target_node = secondary_node
11339 self.other_node = instance.primary_node
11340 check_nodes = [self.target_node, self.other_node]
11342 elif self.mode == constants.REPLACE_DISK_CHG:
11343 self.new_node = remote_node
11344 self.other_node = instance.primary_node
11345 self.target_node = secondary_node
11346 check_nodes = [self.new_node, self.other_node]
11348 _CheckNodeNotDrained(self.lu, remote_node)
11349 _CheckNodeVmCapable(self.lu, remote_node)
11351 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11352 assert old_node_info is not None
11353 if old_node_info.offline and not self.early_release:
11354 # doesn't make sense to delay the release
11355 self.early_release = True
11356 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11357 " early-release mode", secondary_node)
11360 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11363 # If not specified all disks should be replaced
11365 self.disks = range(len(self.instance.disks))
11367 # TODO: This is ugly, but right now we can't distinguish between internal
11368 # submitted opcode and external one. We should fix that.
11369 if self.remote_node_info:
11370 # We change the node, lets verify it still meets instance policy
11371 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11372 cluster = self.cfg.GetClusterInfo()
11373 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11375 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11376 ignore=self.ignore_ipolicy)
11378 for node in check_nodes:
11379 _CheckNodeOnline(self.lu, node)
11381 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11384 if node_name is not None)
11386 # Release unneeded node and node resource locks
11387 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11388 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11389 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11391 # Release any owned node group
11392 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11394 # Check whether disks are valid
11395 for disk_idx in self.disks:
11396 instance.FindDisk(disk_idx)
11398 # Get secondary node IP addresses
11399 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11400 in self.cfg.GetMultiNodeInfo(touched_nodes))
11402 def Exec(self, feedback_fn):
11403 """Execute disk replacement.
11405 This dispatches the disk replacement to the appropriate handler.
11409 # Verify owned locks before starting operation
11410 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11411 assert set(owned_nodes) == set(self.node_secondary_ip), \
11412 ("Incorrect node locks, owning %s, expected %s" %
11413 (owned_nodes, self.node_secondary_ip.keys()))
11414 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11415 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11416 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11418 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11419 assert list(owned_instances) == [self.instance_name], \
11420 "Instance '%s' not locked" % self.instance_name
11422 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11423 "Should not own any node group lock at this point"
11426 feedback_fn("No disks need replacement for instance '%s'" %
11427 self.instance.name)
11430 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11431 (utils.CommaJoin(self.disks), self.instance.name))
11432 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11433 feedback_fn("Current seconary node: %s" %
11434 utils.CommaJoin(self.instance.secondary_nodes))
11436 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11438 # Activate the instance disks if we're replacing them on a down instance
11440 _StartInstanceDisks(self.lu, self.instance, True)
11443 # Should we replace the secondary node?
11444 if self.new_node is not None:
11445 fn = self._ExecDrbd8Secondary
11447 fn = self._ExecDrbd8DiskOnly
11449 result = fn(feedback_fn)
11451 # Deactivate the instance disks if we're replacing them on a
11454 _SafeShutdownInstanceDisks(self.lu, self.instance)
11456 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11459 # Verify owned locks
11460 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11461 nodes = frozenset(self.node_secondary_ip)
11462 assert ((self.early_release and not owned_nodes) or
11463 (not self.early_release and not (set(owned_nodes) - nodes))), \
11464 ("Not owning the correct locks, early_release=%s, owned=%r,"
11465 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11469 def _CheckVolumeGroup(self, nodes):
11470 self.lu.LogInfo("Checking volume groups")
11472 vgname = self.cfg.GetVGName()
11474 # Make sure volume group exists on all involved nodes
11475 results = self.rpc.call_vg_list(nodes)
11477 raise errors.OpExecError("Can't list volume groups on the nodes")
11480 res = results[node]
11481 res.Raise("Error checking node %s" % node)
11482 if vgname not in res.payload:
11483 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11486 def _CheckDisksExistence(self, nodes):
11487 # Check disk existence
11488 for idx, dev in enumerate(self.instance.disks):
11489 if idx not in self.disks:
11493 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11494 self.cfg.SetDiskID(dev, node)
11496 result = _BlockdevFind(self, node, dev, self.instance)
11498 msg = result.fail_msg
11499 if msg or not result.payload:
11501 msg = "disk not found"
11502 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11505 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11506 for idx, dev in enumerate(self.instance.disks):
11507 if idx not in self.disks:
11510 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11513 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11514 on_primary, ldisk=ldisk):
11515 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11516 " replace disks for instance %s" %
11517 (node_name, self.instance.name))
11519 def _CreateNewStorage(self, node_name):
11520 """Create new storage on the primary or secondary node.
11522 This is only used for same-node replaces, not for changing the
11523 secondary node, hence we don't want to modify the existing disk.
11528 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11529 for idx, dev in enumerate(disks):
11530 if idx not in self.disks:
11533 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11535 self.cfg.SetDiskID(dev, node_name)
11537 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11538 names = _GenerateUniqueNames(self.lu, lv_names)
11540 (data_disk, meta_disk) = dev.children
11541 vg_data = data_disk.logical_id[0]
11542 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11543 logical_id=(vg_data, names[0]),
11544 params=data_disk.params)
11545 vg_meta = meta_disk.logical_id[0]
11546 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11547 size=constants.DRBD_META_SIZE,
11548 logical_id=(vg_meta, names[1]),
11549 params=meta_disk.params)
11551 new_lvs = [lv_data, lv_meta]
11552 old_lvs = [child.Copy() for child in dev.children]
11553 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11555 # we pass force_create=True to force the LVM creation
11556 for new_lv in new_lvs:
11557 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11558 _GetInstanceInfoText(self.instance), False)
11562 def _CheckDevices(self, node_name, iv_names):
11563 for name, (dev, _, _) in iv_names.iteritems():
11564 self.cfg.SetDiskID(dev, node_name)
11566 result = _BlockdevFind(self, node_name, dev, self.instance)
11568 msg = result.fail_msg
11569 if msg or not result.payload:
11571 msg = "disk not found"
11572 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11575 if result.payload.is_degraded:
11576 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11578 def _RemoveOldStorage(self, node_name, iv_names):
11579 for name, (_, old_lvs, _) in iv_names.iteritems():
11580 self.lu.LogInfo("Remove logical volumes for %s", name)
11583 self.cfg.SetDiskID(lv, node_name)
11585 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11587 self.lu.LogWarning("Can't remove old LV: %s", msg,
11588 hint="remove unused LVs manually")
11590 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11591 """Replace a disk on the primary or secondary for DRBD 8.
11593 The algorithm for replace is quite complicated:
11595 1. for each disk to be replaced:
11597 1. create new LVs on the target node with unique names
11598 1. detach old LVs from the drbd device
11599 1. rename old LVs to name_replaced.<time_t>
11600 1. rename new LVs to old LVs
11601 1. attach the new LVs (with the old names now) to the drbd device
11603 1. wait for sync across all devices
11605 1. for each modified disk:
11607 1. remove old LVs (which have the name name_replaces.<time_t>)
11609 Failures are not very well handled.
11614 # Step: check device activation
11615 self.lu.LogStep(1, steps_total, "Check device existence")
11616 self._CheckDisksExistence([self.other_node, self.target_node])
11617 self._CheckVolumeGroup([self.target_node, self.other_node])
11619 # Step: check other node consistency
11620 self.lu.LogStep(2, steps_total, "Check peer consistency")
11621 self._CheckDisksConsistency(self.other_node,
11622 self.other_node == self.instance.primary_node,
11625 # Step: create new storage
11626 self.lu.LogStep(3, steps_total, "Allocate new storage")
11627 iv_names = self._CreateNewStorage(self.target_node)
11629 # Step: for each lv, detach+rename*2+attach
11630 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11631 for dev, old_lvs, new_lvs in iv_names.itervalues():
11632 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11634 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11636 result.Raise("Can't detach drbd from local storage on node"
11637 " %s for device %s" % (self.target_node, dev.iv_name))
11639 #cfg.Update(instance)
11641 # ok, we created the new LVs, so now we know we have the needed
11642 # storage; as such, we proceed on the target node to rename
11643 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11644 # using the assumption that logical_id == physical_id (which in
11645 # turn is the unique_id on that node)
11647 # FIXME(iustin): use a better name for the replaced LVs
11648 temp_suffix = int(time.time())
11649 ren_fn = lambda d, suff: (d.physical_id[0],
11650 d.physical_id[1] + "_replaced-%s" % suff)
11652 # Build the rename list based on what LVs exist on the node
11653 rename_old_to_new = []
11654 for to_ren in old_lvs:
11655 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11656 if not result.fail_msg and result.payload:
11658 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11660 self.lu.LogInfo("Renaming the old LVs on the target node")
11661 result = self.rpc.call_blockdev_rename(self.target_node,
11663 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11665 # Now we rename the new LVs to the old LVs
11666 self.lu.LogInfo("Renaming the new LVs on the target node")
11667 rename_new_to_old = [(new, old.physical_id)
11668 for old, new in zip(old_lvs, new_lvs)]
11669 result = self.rpc.call_blockdev_rename(self.target_node,
11671 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11673 # Intermediate steps of in memory modifications
11674 for old, new in zip(old_lvs, new_lvs):
11675 new.logical_id = old.logical_id
11676 self.cfg.SetDiskID(new, self.target_node)
11678 # We need to modify old_lvs so that removal later removes the
11679 # right LVs, not the newly added ones; note that old_lvs is a
11681 for disk in old_lvs:
11682 disk.logical_id = ren_fn(disk, temp_suffix)
11683 self.cfg.SetDiskID(disk, self.target_node)
11685 # Now that the new lvs have the old name, we can add them to the device
11686 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11687 result = self.rpc.call_blockdev_addchildren(self.target_node,
11688 (dev, self.instance), new_lvs)
11689 msg = result.fail_msg
11691 for new_lv in new_lvs:
11692 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11695 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11696 hint=("cleanup manually the unused logical"
11698 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11700 cstep = itertools.count(5)
11702 if self.early_release:
11703 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11704 self._RemoveOldStorage(self.target_node, iv_names)
11705 # TODO: Check if releasing locks early still makes sense
11706 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11708 # Release all resource locks except those used by the instance
11709 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11710 keep=self.node_secondary_ip.keys())
11712 # Release all node locks while waiting for sync
11713 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11715 # TODO: Can the instance lock be downgraded here? Take the optional disk
11716 # shutdown in the caller into consideration.
11719 # This can fail as the old devices are degraded and _WaitForSync
11720 # does a combined result over all disks, so we don't check its return value
11721 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11722 _WaitForSync(self.lu, self.instance)
11724 # Check all devices manually
11725 self._CheckDevices(self.instance.primary_node, iv_names)
11727 # Step: remove old storage
11728 if not self.early_release:
11729 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11730 self._RemoveOldStorage(self.target_node, iv_names)
11732 def _ExecDrbd8Secondary(self, feedback_fn):
11733 """Replace the secondary node for DRBD 8.
11735 The algorithm for replace is quite complicated:
11736 - for all disks of the instance:
11737 - create new LVs on the new node with same names
11738 - shutdown the drbd device on the old secondary
11739 - disconnect the drbd network on the primary
11740 - create the drbd device on the new secondary
11741 - network attach the drbd on the primary, using an artifice:
11742 the drbd code for Attach() will connect to the network if it
11743 finds a device which is connected to the good local disks but
11744 not network enabled
11745 - wait for sync across all devices
11746 - remove all disks from the old secondary
11748 Failures are not very well handled.
11753 pnode = self.instance.primary_node
11755 # Step: check device activation
11756 self.lu.LogStep(1, steps_total, "Check device existence")
11757 self._CheckDisksExistence([self.instance.primary_node])
11758 self._CheckVolumeGroup([self.instance.primary_node])
11760 # Step: check other node consistency
11761 self.lu.LogStep(2, steps_total, "Check peer consistency")
11762 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11764 # Step: create new storage
11765 self.lu.LogStep(3, steps_total, "Allocate new storage")
11766 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11767 for idx, dev in enumerate(disks):
11768 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11769 (self.new_node, idx))
11770 # we pass force_create=True to force LVM creation
11771 for new_lv in dev.children:
11772 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11773 True, _GetInstanceInfoText(self.instance), False)
11775 # Step 4: dbrd minors and drbd setups changes
11776 # after this, we must manually remove the drbd minors on both the
11777 # error and the success paths
11778 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11779 minors = self.cfg.AllocateDRBDMinor([self.new_node
11780 for dev in self.instance.disks],
11781 self.instance.name)
11782 logging.debug("Allocated minors %r", minors)
11785 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11786 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11787 (self.new_node, idx))
11788 # create new devices on new_node; note that we create two IDs:
11789 # one without port, so the drbd will be activated without
11790 # networking information on the new node at this stage, and one
11791 # with network, for the latter activation in step 4
11792 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11793 if self.instance.primary_node == o_node1:
11796 assert self.instance.primary_node == o_node2, "Three-node instance?"
11799 new_alone_id = (self.instance.primary_node, self.new_node, None,
11800 p_minor, new_minor, o_secret)
11801 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11802 p_minor, new_minor, o_secret)
11804 iv_names[idx] = (dev, dev.children, new_net_id)
11805 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11807 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11808 logical_id=new_alone_id,
11809 children=dev.children,
11812 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11815 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11817 _GetInstanceInfoText(self.instance), False)
11818 except errors.GenericError:
11819 self.cfg.ReleaseDRBDMinors(self.instance.name)
11822 # We have new devices, shutdown the drbd on the old secondary
11823 for idx, dev in enumerate(self.instance.disks):
11824 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11825 self.cfg.SetDiskID(dev, self.target_node)
11826 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11827 (dev, self.instance)).fail_msg
11829 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11830 "node: %s" % (idx, msg),
11831 hint=("Please cleanup this device manually as"
11832 " soon as possible"))
11834 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11835 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11836 self.instance.disks)[pnode]
11838 msg = result.fail_msg
11840 # detaches didn't succeed (unlikely)
11841 self.cfg.ReleaseDRBDMinors(self.instance.name)
11842 raise errors.OpExecError("Can't detach the disks from the network on"
11843 " old node: %s" % (msg,))
11845 # if we managed to detach at least one, we update all the disks of
11846 # the instance to point to the new secondary
11847 self.lu.LogInfo("Updating instance configuration")
11848 for dev, _, new_logical_id in iv_names.itervalues():
11849 dev.logical_id = new_logical_id
11850 self.cfg.SetDiskID(dev, self.instance.primary_node)
11852 self.cfg.Update(self.instance, feedback_fn)
11854 # Release all node locks (the configuration has been updated)
11855 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11857 # and now perform the drbd attach
11858 self.lu.LogInfo("Attaching primary drbds to new secondary"
11859 " (standalone => connected)")
11860 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11862 self.node_secondary_ip,
11863 (self.instance.disks, self.instance),
11864 self.instance.name,
11866 for to_node, to_result in result.items():
11867 msg = to_result.fail_msg
11869 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11871 hint=("please do a gnt-instance info to see the"
11872 " status of disks"))
11874 cstep = itertools.count(5)
11876 if self.early_release:
11877 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11878 self._RemoveOldStorage(self.target_node, iv_names)
11879 # TODO: Check if releasing locks early still makes sense
11880 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11882 # Release all resource locks except those used by the instance
11883 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11884 keep=self.node_secondary_ip.keys())
11886 # TODO: Can the instance lock be downgraded here? Take the optional disk
11887 # shutdown in the caller into consideration.
11890 # This can fail as the old devices are degraded and _WaitForSync
11891 # does a combined result over all disks, so we don't check its return value
11892 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11893 _WaitForSync(self.lu, self.instance)
11895 # Check all devices manually
11896 self._CheckDevices(self.instance.primary_node, iv_names)
11898 # Step: remove old storage
11899 if not self.early_release:
11900 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11901 self._RemoveOldStorage(self.target_node, iv_names)
11904 class LURepairNodeStorage(NoHooksLU):
11905 """Repairs the volume group on a node.
11910 def CheckArguments(self):
11911 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11913 storage_type = self.op.storage_type
11915 if (constants.SO_FIX_CONSISTENCY not in
11916 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11917 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11918 " repaired" % storage_type,
11919 errors.ECODE_INVAL)
11921 def ExpandNames(self):
11922 self.needed_locks = {
11923 locking.LEVEL_NODE: [self.op.node_name],
11926 def _CheckFaultyDisks(self, instance, node_name):
11927 """Ensure faulty disks abort the opcode or at least warn."""
11929 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11931 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11932 " node '%s'" % (instance.name, node_name),
11933 errors.ECODE_STATE)
11934 except errors.OpPrereqError, err:
11935 if self.op.ignore_consistency:
11936 self.LogWarning(str(err.args[0]))
11940 def CheckPrereq(self):
11941 """Check prerequisites.
11944 # Check whether any instance on this node has faulty disks
11945 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11946 if inst.admin_state != constants.ADMINST_UP:
11948 check_nodes = set(inst.all_nodes)
11949 check_nodes.discard(self.op.node_name)
11950 for inst_node_name in check_nodes:
11951 self._CheckFaultyDisks(inst, inst_node_name)
11953 def Exec(self, feedback_fn):
11954 feedback_fn("Repairing storage unit '%s' on %s ..." %
11955 (self.op.name, self.op.node_name))
11957 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11958 result = self.rpc.call_storage_execute(self.op.node_name,
11959 self.op.storage_type, st_args,
11961 constants.SO_FIX_CONSISTENCY)
11962 result.Raise("Failed to repair storage unit '%s' on %s" %
11963 (self.op.name, self.op.node_name))
11966 class LUNodeEvacuate(NoHooksLU):
11967 """Evacuates instances off a list of nodes.
11972 _MODE2IALLOCATOR = {
11973 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11974 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11975 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11977 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11978 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11979 constants.IALLOCATOR_NEVAC_MODES)
11981 def CheckArguments(self):
11982 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11984 def ExpandNames(self):
11985 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11987 if self.op.remote_node is not None:
11988 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11989 assert self.op.remote_node
11991 if self.op.remote_node == self.op.node_name:
11992 raise errors.OpPrereqError("Can not use evacuated node as a new"
11993 " secondary node", errors.ECODE_INVAL)
11995 if self.op.mode != constants.NODE_EVAC_SEC:
11996 raise errors.OpPrereqError("Without the use of an iallocator only"
11997 " secondary instances can be evacuated",
11998 errors.ECODE_INVAL)
12001 self.share_locks = _ShareAll()
12002 self.needed_locks = {
12003 locking.LEVEL_INSTANCE: [],
12004 locking.LEVEL_NODEGROUP: [],
12005 locking.LEVEL_NODE: [],
12008 # Determine nodes (via group) optimistically, needs verification once locks
12009 # have been acquired
12010 self.lock_nodes = self._DetermineNodes()
12012 def _DetermineNodes(self):
12013 """Gets the list of nodes to operate on.
12016 if self.op.remote_node is None:
12017 # Iallocator will choose any node(s) in the same group
12018 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12020 group_nodes = frozenset([self.op.remote_node])
12022 # Determine nodes to be locked
12023 return set([self.op.node_name]) | group_nodes
12025 def _DetermineInstances(self):
12026 """Builds list of instances to operate on.
12029 assert self.op.mode in constants.NODE_EVAC_MODES
12031 if self.op.mode == constants.NODE_EVAC_PRI:
12032 # Primary instances only
12033 inst_fn = _GetNodePrimaryInstances
12034 assert self.op.remote_node is None, \
12035 "Evacuating primary instances requires iallocator"
12036 elif self.op.mode == constants.NODE_EVAC_SEC:
12037 # Secondary instances only
12038 inst_fn = _GetNodeSecondaryInstances
12041 assert self.op.mode == constants.NODE_EVAC_ALL
12042 inst_fn = _GetNodeInstances
12043 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12045 raise errors.OpPrereqError("Due to an issue with the iallocator"
12046 " interface it is not possible to evacuate"
12047 " all instances at once; specify explicitly"
12048 " whether to evacuate primary or secondary"
12050 errors.ECODE_INVAL)
12052 return inst_fn(self.cfg, self.op.node_name)
12054 def DeclareLocks(self, level):
12055 if level == locking.LEVEL_INSTANCE:
12056 # Lock instances optimistically, needs verification once node and group
12057 # locks have been acquired
12058 self.needed_locks[locking.LEVEL_INSTANCE] = \
12059 set(i.name for i in self._DetermineInstances())
12061 elif level == locking.LEVEL_NODEGROUP:
12062 # Lock node groups for all potential target nodes optimistically, needs
12063 # verification once nodes have been acquired
12064 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12065 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12067 elif level == locking.LEVEL_NODE:
12068 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12070 def CheckPrereq(self):
12072 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12073 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12074 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12076 need_nodes = self._DetermineNodes()
12078 if not owned_nodes.issuperset(need_nodes):
12079 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12080 " locks were acquired, current nodes are"
12081 " are '%s', used to be '%s'; retry the"
12083 (self.op.node_name,
12084 utils.CommaJoin(need_nodes),
12085 utils.CommaJoin(owned_nodes)),
12086 errors.ECODE_STATE)
12088 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12089 if owned_groups != wanted_groups:
12090 raise errors.OpExecError("Node groups changed since locks were acquired,"
12091 " current groups are '%s', used to be '%s';"
12092 " retry the operation" %
12093 (utils.CommaJoin(wanted_groups),
12094 utils.CommaJoin(owned_groups)))
12096 # Determine affected instances
12097 self.instances = self._DetermineInstances()
12098 self.instance_names = [i.name for i in self.instances]
12100 if set(self.instance_names) != owned_instances:
12101 raise errors.OpExecError("Instances on node '%s' changed since locks"
12102 " were acquired, current instances are '%s',"
12103 " used to be '%s'; retry the operation" %
12104 (self.op.node_name,
12105 utils.CommaJoin(self.instance_names),
12106 utils.CommaJoin(owned_instances)))
12108 if self.instance_names:
12109 self.LogInfo("Evacuating instances from node '%s': %s",
12111 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12113 self.LogInfo("No instances to evacuate from node '%s'",
12116 if self.op.remote_node is not None:
12117 for i in self.instances:
12118 if i.primary_node == self.op.remote_node:
12119 raise errors.OpPrereqError("Node %s is the primary node of"
12120 " instance %s, cannot use it as"
12122 (self.op.remote_node, i.name),
12123 errors.ECODE_INVAL)
12125 def Exec(self, feedback_fn):
12126 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12128 if not self.instance_names:
12129 # No instances to evacuate
12132 elif self.op.iallocator is not None:
12133 # TODO: Implement relocation to other group
12134 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12135 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12136 instances=list(self.instance_names))
12137 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12139 ial.Run(self.op.iallocator)
12141 if not ial.success:
12142 raise errors.OpPrereqError("Can't compute node evacuation using"
12143 " iallocator '%s': %s" %
12144 (self.op.iallocator, ial.info),
12145 errors.ECODE_NORES)
12147 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12149 elif self.op.remote_node is not None:
12150 assert self.op.mode == constants.NODE_EVAC_SEC
12152 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12153 remote_node=self.op.remote_node,
12155 mode=constants.REPLACE_DISK_CHG,
12156 early_release=self.op.early_release)]
12157 for instance_name in self.instance_names]
12160 raise errors.ProgrammerError("No iallocator or remote node")
12162 return ResultWithJobs(jobs)
12165 def _SetOpEarlyRelease(early_release, op):
12166 """Sets C{early_release} flag on opcodes if available.
12170 op.early_release = early_release
12171 except AttributeError:
12172 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12177 def _NodeEvacDest(use_nodes, group, nodes):
12178 """Returns group or nodes depending on caller's choice.
12182 return utils.CommaJoin(nodes)
12187 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12188 """Unpacks the result of change-group and node-evacuate iallocator requests.
12190 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12191 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12193 @type lu: L{LogicalUnit}
12194 @param lu: Logical unit instance
12195 @type alloc_result: tuple/list
12196 @param alloc_result: Result from iallocator
12197 @type early_release: bool
12198 @param early_release: Whether to release locks early if possible
12199 @type use_nodes: bool
12200 @param use_nodes: Whether to display node names instead of groups
12203 (moved, failed, jobs) = alloc_result
12206 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12207 for (name, reason) in failed)
12208 lu.LogWarning("Unable to evacuate instances %s", failreason)
12209 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12212 lu.LogInfo("Instances to be moved: %s",
12213 utils.CommaJoin("%s (to %s)" %
12214 (name, _NodeEvacDest(use_nodes, group, nodes))
12215 for (name, group, nodes) in moved))
12217 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12218 map(opcodes.OpCode.LoadOpCode, ops))
12222 def _DiskSizeInBytesToMebibytes(lu, size):
12223 """Converts a disk size in bytes to mebibytes.
12225 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12228 (mib, remainder) = divmod(size, 1024 * 1024)
12231 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12232 " to not overwrite existing data (%s bytes will not be"
12233 " wiped)", (1024 * 1024) - remainder)
12239 class LUInstanceGrowDisk(LogicalUnit):
12240 """Grow a disk of an instance.
12243 HPATH = "disk-grow"
12244 HTYPE = constants.HTYPE_INSTANCE
12247 def ExpandNames(self):
12248 self._ExpandAndLockInstance()
12249 self.needed_locks[locking.LEVEL_NODE] = []
12250 self.needed_locks[locking.LEVEL_NODE_RES] = []
12251 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12252 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12254 def DeclareLocks(self, level):
12255 if level == locking.LEVEL_NODE:
12256 self._LockInstancesNodes()
12257 elif level == locking.LEVEL_NODE_RES:
12259 self.needed_locks[locking.LEVEL_NODE_RES] = \
12260 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12262 def BuildHooksEnv(self):
12263 """Build hooks env.
12265 This runs on the master, the primary and all the secondaries.
12269 "DISK": self.op.disk,
12270 "AMOUNT": self.op.amount,
12271 "ABSOLUTE": self.op.absolute,
12273 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12276 def BuildHooksNodes(self):
12277 """Build hooks nodes.
12280 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12283 def CheckPrereq(self):
12284 """Check prerequisites.
12286 This checks that the instance is in the cluster.
12289 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12290 assert instance is not None, \
12291 "Cannot retrieve locked instance %s" % self.op.instance_name
12292 nodenames = list(instance.all_nodes)
12293 for node in nodenames:
12294 _CheckNodeOnline(self, node)
12296 self.instance = instance
12298 if instance.disk_template not in constants.DTS_GROWABLE:
12299 raise errors.OpPrereqError("Instance's disk layout does not support"
12300 " growing", errors.ECODE_INVAL)
12302 self.disk = instance.FindDisk(self.op.disk)
12304 if self.op.absolute:
12305 self.target = self.op.amount
12306 self.delta = self.target - self.disk.size
12308 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12309 "current disk size (%s)" %
12310 (utils.FormatUnit(self.target, "h"),
12311 utils.FormatUnit(self.disk.size, "h")),
12312 errors.ECODE_STATE)
12314 self.delta = self.op.amount
12315 self.target = self.disk.size + self.delta
12317 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12318 utils.FormatUnit(self.delta, "h"),
12319 errors.ECODE_INVAL)
12321 if instance.disk_template not in (constants.DT_FILE,
12322 constants.DT_SHARED_FILE,
12324 # TODO: check the free disk space for file, when that feature will be
12326 _CheckNodesFreeDiskPerVG(self, nodenames,
12327 self.disk.ComputeGrowth(self.delta))
12329 def Exec(self, feedback_fn):
12330 """Execute disk grow.
12333 instance = self.instance
12336 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12337 assert (self.owned_locks(locking.LEVEL_NODE) ==
12338 self.owned_locks(locking.LEVEL_NODE_RES))
12340 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12342 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12344 raise errors.OpExecError("Cannot activate block device to grow")
12346 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12347 (self.op.disk, instance.name,
12348 utils.FormatUnit(self.delta, "h"),
12349 utils.FormatUnit(self.target, "h")))
12351 # First run all grow ops in dry-run mode
12352 for node in instance.all_nodes:
12353 self.cfg.SetDiskID(disk, node)
12354 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12356 result.Raise("Dry-run grow request failed to node %s" % node)
12359 # Get disk size from primary node for wiping
12360 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12361 result.Raise("Failed to retrieve disk size from node '%s'" %
12362 instance.primary_node)
12364 (disk_size_in_bytes, ) = result.payload
12366 if disk_size_in_bytes is None:
12367 raise errors.OpExecError("Failed to retrieve disk size from primary"
12368 " node '%s'" % instance.primary_node)
12370 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12372 assert old_disk_size >= disk.size, \
12373 ("Retrieved disk size too small (got %s, should be at least %s)" %
12374 (old_disk_size, disk.size))
12376 old_disk_size = None
12378 # We know that (as far as we can test) operations across different
12379 # nodes will succeed, time to run it for real on the backing storage
12380 for node in instance.all_nodes:
12381 self.cfg.SetDiskID(disk, node)
12382 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12384 result.Raise("Grow request failed to node %s" % node)
12386 # And now execute it for logical storage, on the primary node
12387 node = instance.primary_node
12388 self.cfg.SetDiskID(disk, node)
12389 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12391 result.Raise("Grow request failed to node %s" % node)
12393 disk.RecordGrow(self.delta)
12394 self.cfg.Update(instance, feedback_fn)
12396 # Changes have been recorded, release node lock
12397 _ReleaseLocks(self, locking.LEVEL_NODE)
12399 # Downgrade lock while waiting for sync
12400 self.glm.downgrade(locking.LEVEL_INSTANCE)
12402 assert wipe_disks ^ (old_disk_size is None)
12405 assert instance.disks[self.op.disk] == disk
12407 # Wipe newly added disk space
12408 _WipeDisks(self, instance,
12409 disks=[(self.op.disk, disk, old_disk_size)])
12411 if self.op.wait_for_sync:
12412 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12414 self.LogWarning("Disk syncing has not returned a good status; check"
12416 if instance.admin_state != constants.ADMINST_UP:
12417 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12418 elif instance.admin_state != constants.ADMINST_UP:
12419 self.LogWarning("Not shutting down the disk even if the instance is"
12420 " not supposed to be running because no wait for"
12421 " sync mode was requested")
12423 assert self.owned_locks(locking.LEVEL_NODE_RES)
12424 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12427 class LUInstanceQueryData(NoHooksLU):
12428 """Query runtime instance data.
12433 def ExpandNames(self):
12434 self.needed_locks = {}
12436 # Use locking if requested or when non-static information is wanted
12437 if not (self.op.static or self.op.use_locking):
12438 self.LogWarning("Non-static data requested, locks need to be acquired")
12439 self.op.use_locking = True
12441 if self.op.instances or not self.op.use_locking:
12442 # Expand instance names right here
12443 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12445 # Will use acquired locks
12446 self.wanted_names = None
12448 if self.op.use_locking:
12449 self.share_locks = _ShareAll()
12451 if self.wanted_names is None:
12452 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12454 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12456 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12457 self.needed_locks[locking.LEVEL_NODE] = []
12458 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12460 def DeclareLocks(self, level):
12461 if self.op.use_locking:
12462 if level == locking.LEVEL_NODEGROUP:
12463 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12465 # Lock all groups used by instances optimistically; this requires going
12466 # via the node before it's locked, requiring verification later on
12467 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12468 frozenset(group_uuid
12469 for instance_name in owned_instances
12471 self.cfg.GetInstanceNodeGroups(instance_name))
12473 elif level == locking.LEVEL_NODE:
12474 self._LockInstancesNodes()
12476 def CheckPrereq(self):
12477 """Check prerequisites.
12479 This only checks the optional instance list against the existing names.
12482 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12483 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12484 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12486 if self.wanted_names is None:
12487 assert self.op.use_locking, "Locking was not used"
12488 self.wanted_names = owned_instances
12490 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12492 if self.op.use_locking:
12493 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12496 assert not (owned_instances or owned_groups or owned_nodes)
12498 self.wanted_instances = instances.values()
12500 def _ComputeBlockdevStatus(self, node, instance, dev):
12501 """Returns the status of a block device
12504 if self.op.static or not node:
12507 self.cfg.SetDiskID(dev, node)
12509 result = self.rpc.call_blockdev_find(node, dev)
12513 result.Raise("Can't compute disk status for %s" % instance.name)
12515 status = result.payload
12519 return (status.dev_path, status.major, status.minor,
12520 status.sync_percent, status.estimated_time,
12521 status.is_degraded, status.ldisk_status)
12523 def _ComputeDiskStatus(self, instance, snode, dev):
12524 """Compute block device status.
12527 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12529 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12531 def _ComputeDiskStatusInner(self, instance, snode, dev):
12532 """Compute block device status.
12534 @attention: The device has to be annotated already.
12537 if dev.dev_type in constants.LDS_DRBD:
12538 # we change the snode then (otherwise we use the one passed in)
12539 if dev.logical_id[0] == instance.primary_node:
12540 snode = dev.logical_id[1]
12542 snode = dev.logical_id[0]
12544 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12546 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12549 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12556 "iv_name": dev.iv_name,
12557 "dev_type": dev.dev_type,
12558 "logical_id": dev.logical_id,
12559 "physical_id": dev.physical_id,
12560 "pstatus": dev_pstatus,
12561 "sstatus": dev_sstatus,
12562 "children": dev_children,
12567 def Exec(self, feedback_fn):
12568 """Gather and return data"""
12571 cluster = self.cfg.GetClusterInfo()
12573 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12574 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12576 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12577 for node in nodes.values()))
12579 group2name_fn = lambda uuid: groups[uuid].name
12581 for instance in self.wanted_instances:
12582 pnode = nodes[instance.primary_node]
12584 if self.op.static or pnode.offline:
12585 remote_state = None
12587 self.LogWarning("Primary node %s is marked offline, returning static"
12588 " information only for instance %s" %
12589 (pnode.name, instance.name))
12591 remote_info = self.rpc.call_instance_info(instance.primary_node,
12593 instance.hypervisor)
12594 remote_info.Raise("Error checking node %s" % instance.primary_node)
12595 remote_info = remote_info.payload
12596 if remote_info and "state" in remote_info:
12597 remote_state = "up"
12599 if instance.admin_state == constants.ADMINST_UP:
12600 remote_state = "down"
12602 remote_state = instance.admin_state
12604 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12607 snodes_group_uuids = [nodes[snode_name].group
12608 for snode_name in instance.secondary_nodes]
12610 result[instance.name] = {
12611 "name": instance.name,
12612 "config_state": instance.admin_state,
12613 "run_state": remote_state,
12614 "pnode": instance.primary_node,
12615 "pnode_group_uuid": pnode.group,
12616 "pnode_group_name": group2name_fn(pnode.group),
12617 "snodes": instance.secondary_nodes,
12618 "snodes_group_uuids": snodes_group_uuids,
12619 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12621 # this happens to be the same format used for hooks
12622 "nics": _NICListToTuple(self, instance.nics),
12623 "disk_template": instance.disk_template,
12625 "hypervisor": instance.hypervisor,
12626 "network_port": instance.network_port,
12627 "hv_instance": instance.hvparams,
12628 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12629 "be_instance": instance.beparams,
12630 "be_actual": cluster.FillBE(instance),
12631 "os_instance": instance.osparams,
12632 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12633 "serial_no": instance.serial_no,
12634 "mtime": instance.mtime,
12635 "ctime": instance.ctime,
12636 "uuid": instance.uuid,
12642 def PrepareContainerMods(mods, private_fn):
12643 """Prepares a list of container modifications by adding a private data field.
12645 @type mods: list of tuples; (operation, index, parameters)
12646 @param mods: List of modifications
12647 @type private_fn: callable or None
12648 @param private_fn: Callable for constructing a private data field for a
12653 if private_fn is None:
12658 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12661 #: Type description for changes as returned by L{ApplyContainerMods}'s
12663 _TApplyContModsCbChanges = \
12664 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12665 ht.TNonEmptyString,
12670 def ApplyContainerMods(kind, container, chgdesc, mods,
12671 create_fn, modify_fn, remove_fn):
12672 """Applies descriptions in C{mods} to C{container}.
12675 @param kind: One-word item description
12676 @type container: list
12677 @param container: Container to modify
12678 @type chgdesc: None or list
12679 @param chgdesc: List of applied changes
12681 @param mods: Modifications as returned by L{PrepareContainerMods}
12682 @type create_fn: callable
12683 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12684 receives absolute item index, parameters and private data object as added
12685 by L{PrepareContainerMods}, returns tuple containing new item and changes
12687 @type modify_fn: callable
12688 @param modify_fn: Callback for modifying an existing item
12689 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12690 and private data object as added by L{PrepareContainerMods}, returns
12692 @type remove_fn: callable
12693 @param remove_fn: Callback on removing item; receives absolute item index,
12694 item and private data object as added by L{PrepareContainerMods}
12697 for (op, idx, params, private) in mods:
12700 absidx = len(container) - 1
12702 raise IndexError("Not accepting negative indices other than -1")
12703 elif idx > len(container):
12704 raise IndexError("Got %s index %s, but there are only %s" %
12705 (kind, idx, len(container)))
12711 if op == constants.DDM_ADD:
12712 # Calculate where item will be added
12714 addidx = len(container)
12718 if create_fn is None:
12721 (item, changes) = create_fn(addidx, params, private)
12724 container.append(item)
12727 assert idx <= len(container)
12728 # list.insert does so before the specified index
12729 container.insert(idx, item)
12731 # Retrieve existing item
12733 item = container[absidx]
12735 raise IndexError("Invalid %s index %s" % (kind, idx))
12737 if op == constants.DDM_REMOVE:
12740 if remove_fn is not None:
12741 remove_fn(absidx, item, private)
12743 changes = [("%s/%s" % (kind, absidx), "remove")]
12745 assert container[absidx] == item
12746 del container[absidx]
12747 elif op == constants.DDM_MODIFY:
12748 if modify_fn is not None:
12749 changes = modify_fn(absidx, item, params, private)
12751 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12753 assert _TApplyContModsCbChanges(changes)
12755 if not (chgdesc is None or changes is None):
12756 chgdesc.extend(changes)
12759 def _UpdateIvNames(base_index, disks):
12760 """Updates the C{iv_name} attribute of disks.
12762 @type disks: list of L{objects.Disk}
12765 for (idx, disk) in enumerate(disks):
12766 disk.iv_name = "disk/%s" % (base_index + idx, )
12769 class _InstNicModPrivate:
12770 """Data structure for network interface modifications.
12772 Used by L{LUInstanceSetParams}.
12775 def __init__(self):
12780 class LUInstanceSetParams(LogicalUnit):
12781 """Modifies an instances's parameters.
12784 HPATH = "instance-modify"
12785 HTYPE = constants.HTYPE_INSTANCE
12789 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12790 assert ht.TList(mods)
12791 assert not mods or len(mods[0]) in (2, 3)
12793 if mods and len(mods[0]) == 2:
12797 for op, params in mods:
12798 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12799 result.append((op, -1, params))
12803 raise errors.OpPrereqError("Only one %s add or remove operation is"
12804 " supported at a time" % kind,
12805 errors.ECODE_INVAL)
12807 result.append((constants.DDM_MODIFY, op, params))
12809 assert verify_fn(result)
12816 def _CheckMods(kind, mods, key_types, item_fn):
12817 """Ensures requested disk/NIC modifications are valid.
12820 for (op, _, params) in mods:
12821 assert ht.TDict(params)
12823 utils.ForceDictType(params, key_types)
12825 if op == constants.DDM_REMOVE:
12827 raise errors.OpPrereqError("No settings should be passed when"
12828 " removing a %s" % kind,
12829 errors.ECODE_INVAL)
12830 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12831 item_fn(op, params)
12833 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12836 def _VerifyDiskModification(op, params):
12837 """Verifies a disk modification.
12840 if op == constants.DDM_ADD:
12841 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12842 if mode not in constants.DISK_ACCESS_SET:
12843 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12844 errors.ECODE_INVAL)
12846 size = params.get(constants.IDISK_SIZE, None)
12848 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12849 constants.IDISK_SIZE, errors.ECODE_INVAL)
12853 except (TypeError, ValueError), err:
12854 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12855 errors.ECODE_INVAL)
12857 params[constants.IDISK_SIZE] = size
12859 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12860 raise errors.OpPrereqError("Disk size change not possible, use"
12861 " grow-disk", errors.ECODE_INVAL)
12864 def _VerifyNicModification(op, params):
12865 """Verifies a network interface modification.
12868 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12869 ip = params.get(constants.INIC_IP, None)
12870 req_net = params.get(constants.INIC_NETWORK, None)
12871 link = params.get(constants.NIC_LINK, None)
12872 mode = params.get(constants.NIC_MODE, None)
12873 if req_net is not None:
12874 if req_net.lower() == constants.VALUE_NONE:
12875 params[constants.INIC_NETWORK] = None
12877 elif link is not None or mode is not None:
12878 raise errors.OpPrereqError("If network is given"
12879 " mode or link should not",
12880 errors.ECODE_INVAL)
12882 if op == constants.DDM_ADD:
12883 macaddr = params.get(constants.INIC_MAC, None)
12884 if macaddr is None:
12885 params[constants.INIC_MAC] = constants.VALUE_AUTO
12888 if ip.lower() == constants.VALUE_NONE:
12889 params[constants.INIC_IP] = None
12891 if ip.lower() == constants.NIC_IP_POOL:
12892 if op == constants.DDM_ADD and req_net is None:
12893 raise errors.OpPrereqError("If ip=pool, parameter network"
12895 errors.ECODE_INVAL)
12897 if not netutils.IPAddress.IsValid(ip):
12898 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12899 errors.ECODE_INVAL)
12901 if constants.INIC_MAC in params:
12902 macaddr = params[constants.INIC_MAC]
12903 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12904 macaddr = utils.NormalizeAndValidateMac(macaddr)
12906 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12907 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12908 " modifying an existing NIC",
12909 errors.ECODE_INVAL)
12911 def CheckArguments(self):
12912 if not (self.op.nics or self.op.disks or self.op.disk_template or
12913 self.op.hvparams or self.op.beparams or self.op.os_name or
12914 self.op.offline is not None or self.op.runtime_mem):
12915 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12917 if self.op.hvparams:
12918 _CheckGlobalHvParams(self.op.hvparams)
12920 self.op.disks = self._UpgradeDiskNicMods(
12921 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12922 self.op.nics = self._UpgradeDiskNicMods(
12923 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12925 # Check disk modifications
12926 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12927 self._VerifyDiskModification)
12929 if self.op.disks and self.op.disk_template is not None:
12930 raise errors.OpPrereqError("Disk template conversion and other disk"
12931 " changes not supported at the same time",
12932 errors.ECODE_INVAL)
12934 if (self.op.disk_template and
12935 self.op.disk_template in constants.DTS_INT_MIRROR and
12936 self.op.remote_node is None):
12937 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12938 " one requires specifying a secondary node",
12939 errors.ECODE_INVAL)
12941 # Check NIC modifications
12942 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12943 self._VerifyNicModification)
12945 def ExpandNames(self):
12946 self._ExpandAndLockInstance()
12947 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12948 # Can't even acquire node locks in shared mode as upcoming changes in
12949 # Ganeti 2.6 will start to modify the node object on disk conversion
12950 self.needed_locks[locking.LEVEL_NODE] = []
12951 self.needed_locks[locking.LEVEL_NODE_RES] = []
12952 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12953 # Look node group to look up the ipolicy
12954 self.share_locks[locking.LEVEL_NODEGROUP] = 1
12956 def DeclareLocks(self, level):
12957 if level == locking.LEVEL_NODEGROUP:
12958 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12959 # Acquire locks for the instance's nodegroups optimistically. Needs
12960 # to be verified in CheckPrereq
12961 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12962 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12963 elif level == locking.LEVEL_NODE:
12964 self._LockInstancesNodes()
12965 if self.op.disk_template and self.op.remote_node:
12966 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12967 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12968 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12970 self.needed_locks[locking.LEVEL_NODE_RES] = \
12971 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12973 def BuildHooksEnv(self):
12974 """Build hooks env.
12976 This runs on the master, primary and secondaries.
12980 if constants.BE_MINMEM in self.be_new:
12981 args["minmem"] = self.be_new[constants.BE_MINMEM]
12982 if constants.BE_MAXMEM in self.be_new:
12983 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12984 if constants.BE_VCPUS in self.be_new:
12985 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12986 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12987 # information at all.
12989 if self._new_nics is not None:
12992 for nic in self._new_nics:
12993 n = copy.deepcopy(nic)
12994 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12995 n.nicparams = nicparams
12996 nics.append(_NICToTuple(self, n))
12998 args["nics"] = nics
13000 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13001 if self.op.disk_template:
13002 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13003 if self.op.runtime_mem:
13004 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13008 def BuildHooksNodes(self):
13009 """Build hooks nodes.
13012 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13015 def _PrepareNicModification(self, params, private, old_ip, old_net,
13016 old_params, cluster, pnode):
13018 update_params_dict = dict([(key, params[key])
13019 for key in constants.NICS_PARAMETERS
13022 req_link = update_params_dict.get(constants.NIC_LINK, None)
13023 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13025 new_net = params.get(constants.INIC_NETWORK, old_net)
13026 if new_net is not None:
13027 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13028 if netparams is None:
13029 raise errors.OpPrereqError("No netparams found for the network"
13030 " %s, probably not connected" % new_net,
13031 errors.ECODE_INVAL)
13032 new_params = dict(netparams)
13034 new_params = _GetUpdatedParams(old_params, update_params_dict)
13036 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13038 new_filled_params = cluster.SimpleFillNIC(new_params)
13039 objects.NIC.CheckParameterSyntax(new_filled_params)
13041 new_mode = new_filled_params[constants.NIC_MODE]
13042 if new_mode == constants.NIC_MODE_BRIDGED:
13043 bridge = new_filled_params[constants.NIC_LINK]
13044 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13046 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13048 self.warn.append(msg)
13050 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13052 elif new_mode == constants.NIC_MODE_ROUTED:
13053 ip = params.get(constants.INIC_IP, old_ip)
13055 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13056 " on a routed NIC", errors.ECODE_INVAL)
13058 elif new_mode == constants.NIC_MODE_OVS:
13059 # TODO: check OVS link
13060 self.LogInfo("OVS links are currently not checked for correctness")
13062 if constants.INIC_MAC in params:
13063 mac = params[constants.INIC_MAC]
13065 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13066 errors.ECODE_INVAL)
13067 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13068 # otherwise generate the MAC address
13069 params[constants.INIC_MAC] = \
13070 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13072 # or validate/reserve the current one
13074 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13075 except errors.ReservationError:
13076 raise errors.OpPrereqError("MAC address '%s' already in use"
13077 " in cluster" % mac,
13078 errors.ECODE_NOTUNIQUE)
13079 elif new_net != old_net:
13081 def get_net_prefix(net):
13083 uuid = self.cfg.LookupNetwork(net)
13085 nobj = self.cfg.GetNetwork(uuid)
13086 return nobj.mac_prefix
13089 new_prefix = get_net_prefix(new_net)
13090 old_prefix = get_net_prefix(old_net)
13091 if old_prefix != new_prefix:
13092 params[constants.INIC_MAC] = \
13093 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13095 #if there is a change in nic-network configuration
13096 new_ip = params.get(constants.INIC_IP, old_ip)
13097 if (new_ip, new_net) != (old_ip, old_net):
13100 if new_ip.lower() == constants.NIC_IP_POOL:
13102 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13103 except errors.ReservationError:
13104 raise errors.OpPrereqError("Unable to get a free IP"
13105 " from the address pool",
13106 errors.ECODE_STATE)
13107 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13108 params[constants.INIC_IP] = new_ip
13109 elif new_ip != old_ip or new_net != old_net:
13111 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13112 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13113 except errors.ReservationError:
13114 raise errors.OpPrereqError("IP %s not available in network %s" %
13116 errors.ECODE_NOTUNIQUE)
13117 elif new_ip.lower() == constants.NIC_IP_POOL:
13118 raise errors.OpPrereqError("ip=pool, but no network found",
13119 errors.ECODE_INVAL)
13122 if self.op.conflicts_check:
13123 _CheckForConflictingIp(self, new_ip, pnode)
13128 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13129 except errors.AddressPoolError:
13130 logging.warning("Release IP %s not contained in network %s",
13133 # there are no changes in (net, ip) tuple
13134 elif (old_net is not None and
13135 (req_link is not None or req_mode is not None)):
13136 raise errors.OpPrereqError("Not allowed to change link or mode of"
13137 " a NIC that is connected to a network",
13138 errors.ECODE_INVAL)
13140 private.params = new_params
13141 private.filled = new_filled_params
13143 def CheckPrereq(self):
13144 """Check prerequisites.
13146 This only checks the instance list against the existing names.
13149 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13150 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13152 cluster = self.cluster = self.cfg.GetClusterInfo()
13153 assert self.instance is not None, \
13154 "Cannot retrieve locked instance %s" % self.op.instance_name
13156 pnode = instance.primary_node
13157 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13158 nodelist = list(instance.all_nodes)
13159 pnode_info = self.cfg.GetNodeInfo(pnode)
13160 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13162 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13163 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13164 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13166 # dictionary with instance information after the modification
13169 # Prepare disk/NIC modifications
13170 self.diskmod = PrepareContainerMods(self.op.disks, None)
13171 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13174 if self.op.os_name and not self.op.force:
13175 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13176 self.op.force_variant)
13177 instance_os = self.op.os_name
13179 instance_os = instance.os
13181 assert not (self.op.disk_template and self.op.disks), \
13182 "Can't modify disk template and apply disk changes at the same time"
13184 if self.op.disk_template:
13185 if instance.disk_template == self.op.disk_template:
13186 raise errors.OpPrereqError("Instance already has disk template %s" %
13187 instance.disk_template, errors.ECODE_INVAL)
13189 if (instance.disk_template,
13190 self.op.disk_template) not in self._DISK_CONVERSIONS:
13191 raise errors.OpPrereqError("Unsupported disk template conversion from"
13192 " %s to %s" % (instance.disk_template,
13193 self.op.disk_template),
13194 errors.ECODE_INVAL)
13195 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13196 msg="cannot change disk template")
13197 if self.op.disk_template in constants.DTS_INT_MIRROR:
13198 if self.op.remote_node == pnode:
13199 raise errors.OpPrereqError("Given new secondary node %s is the same"
13200 " as the primary node of the instance" %
13201 self.op.remote_node, errors.ECODE_STATE)
13202 _CheckNodeOnline(self, self.op.remote_node)
13203 _CheckNodeNotDrained(self, self.op.remote_node)
13204 # FIXME: here we assume that the old instance type is DT_PLAIN
13205 assert instance.disk_template == constants.DT_PLAIN
13206 disks = [{constants.IDISK_SIZE: d.size,
13207 constants.IDISK_VG: d.logical_id[0]}
13208 for d in instance.disks]
13209 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13210 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13212 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13213 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13214 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13216 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13217 ignore=self.op.ignore_ipolicy)
13218 if pnode_info.group != snode_info.group:
13219 self.LogWarning("The primary and secondary nodes are in two"
13220 " different node groups; the disk parameters"
13221 " from the first disk's node group will be"
13224 # hvparams processing
13225 if self.op.hvparams:
13226 hv_type = instance.hypervisor
13227 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13228 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13229 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13232 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13233 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13234 self.hv_proposed = self.hv_new = hv_new # the new actual values
13235 self.hv_inst = i_hvdict # the new dict (without defaults)
13237 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13239 self.hv_new = self.hv_inst = {}
13241 # beparams processing
13242 if self.op.beparams:
13243 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13245 objects.UpgradeBeParams(i_bedict)
13246 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13247 be_new = cluster.SimpleFillBE(i_bedict)
13248 self.be_proposed = self.be_new = be_new # the new actual values
13249 self.be_inst = i_bedict # the new dict (without defaults)
13251 self.be_new = self.be_inst = {}
13252 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13253 be_old = cluster.FillBE(instance)
13255 # CPU param validation -- checking every time a parameter is
13256 # changed to cover all cases where either CPU mask or vcpus have
13258 if (constants.BE_VCPUS in self.be_proposed and
13259 constants.HV_CPU_MASK in self.hv_proposed):
13261 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13262 # Verify mask is consistent with number of vCPUs. Can skip this
13263 # test if only 1 entry in the CPU mask, which means same mask
13264 # is applied to all vCPUs.
13265 if (len(cpu_list) > 1 and
13266 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13267 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13269 (self.be_proposed[constants.BE_VCPUS],
13270 self.hv_proposed[constants.HV_CPU_MASK]),
13271 errors.ECODE_INVAL)
13273 # Only perform this test if a new CPU mask is given
13274 if constants.HV_CPU_MASK in self.hv_new:
13275 # Calculate the largest CPU number requested
13276 max_requested_cpu = max(map(max, cpu_list))
13277 # Check that all of the instance's nodes have enough physical CPUs to
13278 # satisfy the requested CPU mask
13279 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13280 max_requested_cpu + 1, instance.hypervisor)
13282 # osparams processing
13283 if self.op.osparams:
13284 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13285 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13286 self.os_inst = i_osdict # the new dict (without defaults)
13292 #TODO(dynmem): do the appropriate check involving MINMEM
13293 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13294 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13295 mem_check_list = [pnode]
13296 if be_new[constants.BE_AUTO_BALANCE]:
13297 # either we changed auto_balance to yes or it was from before
13298 mem_check_list.extend(instance.secondary_nodes)
13299 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13300 instance.hypervisor)
13301 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13302 [instance.hypervisor])
13303 pninfo = nodeinfo[pnode]
13304 msg = pninfo.fail_msg
13306 # Assume the primary node is unreachable and go ahead
13307 self.warn.append("Can't get info from primary node %s: %s" %
13310 (_, _, (pnhvinfo, )) = pninfo.payload
13311 if not isinstance(pnhvinfo.get("memory_free", None), int):
13312 self.warn.append("Node data from primary node %s doesn't contain"
13313 " free memory information" % pnode)
13314 elif instance_info.fail_msg:
13315 self.warn.append("Can't get instance runtime information: %s" %
13316 instance_info.fail_msg)
13318 if instance_info.payload:
13319 current_mem = int(instance_info.payload["memory"])
13321 # Assume instance not running
13322 # (there is a slight race condition here, but it's not very
13323 # probable, and we have no other way to check)
13324 # TODO: Describe race condition
13326 #TODO(dynmem): do the appropriate check involving MINMEM
13327 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13328 pnhvinfo["memory_free"])
13330 raise errors.OpPrereqError("This change will prevent the instance"
13331 " from starting, due to %d MB of memory"
13332 " missing on its primary node" %
13333 miss_mem, errors.ECODE_NORES)
13335 if be_new[constants.BE_AUTO_BALANCE]:
13336 for node, nres in nodeinfo.items():
13337 if node not in instance.secondary_nodes:
13339 nres.Raise("Can't get info from secondary node %s" % node,
13340 prereq=True, ecode=errors.ECODE_STATE)
13341 (_, _, (nhvinfo, )) = nres.payload
13342 if not isinstance(nhvinfo.get("memory_free", None), int):
13343 raise errors.OpPrereqError("Secondary node %s didn't return free"
13344 " memory information" % node,
13345 errors.ECODE_STATE)
13346 #TODO(dynmem): do the appropriate check involving MINMEM
13347 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13348 raise errors.OpPrereqError("This change will prevent the instance"
13349 " from failover to its secondary node"
13350 " %s, due to not enough memory" % node,
13351 errors.ECODE_STATE)
13353 if self.op.runtime_mem:
13354 remote_info = self.rpc.call_instance_info(instance.primary_node,
13356 instance.hypervisor)
13357 remote_info.Raise("Error checking node %s" % instance.primary_node)
13358 if not remote_info.payload: # not running already
13359 raise errors.OpPrereqError("Instance %s is not running" %
13360 instance.name, errors.ECODE_STATE)
13362 current_memory = remote_info.payload["memory"]
13363 if (not self.op.force and
13364 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13365 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13366 raise errors.OpPrereqError("Instance %s must have memory between %d"
13367 " and %d MB of memory unless --force is"
13370 self.be_proposed[constants.BE_MINMEM],
13371 self.be_proposed[constants.BE_MAXMEM]),
13372 errors.ECODE_INVAL)
13374 delta = self.op.runtime_mem - current_memory
13376 _CheckNodeFreeMemory(self, instance.primary_node,
13377 "ballooning memory for instance %s" %
13378 instance.name, delta, instance.hypervisor)
13380 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13381 raise errors.OpPrereqError("Disk operations not supported for"
13382 " diskless instances", errors.ECODE_INVAL)
13384 def _PrepareNicCreate(_, params, private):
13385 self._PrepareNicModification(params, private, None, None,
13386 {}, cluster, pnode)
13387 return (None, None)
13389 def _PrepareNicMod(_, nic, params, private):
13390 self._PrepareNicModification(params, private, nic.ip, nic.network,
13391 nic.nicparams, cluster, pnode)
13394 def _PrepareNicRemove(_, params, __):
13396 net = params.network
13397 if net is not None and ip is not None:
13398 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13400 # Verify NIC changes (operating on copy)
13401 nics = instance.nics[:]
13402 ApplyContainerMods("NIC", nics, None, self.nicmod,
13403 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13404 if len(nics) > constants.MAX_NICS:
13405 raise errors.OpPrereqError("Instance has too many network interfaces"
13406 " (%d), cannot add more" % constants.MAX_NICS,
13407 errors.ECODE_STATE)
13409 # Verify disk changes (operating on a copy)
13410 disks = instance.disks[:]
13411 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13412 if len(disks) > constants.MAX_DISKS:
13413 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13414 " more" % constants.MAX_DISKS,
13415 errors.ECODE_STATE)
13416 disk_sizes = [disk.size for disk in instance.disks]
13417 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13418 self.diskmod if op == constants.DDM_ADD)
13419 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13420 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13422 if self.op.offline is not None:
13423 if self.op.offline:
13424 msg = "can't change to offline"
13426 msg = "can't change to online"
13427 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13429 # Pre-compute NIC changes (necessary to use result in hooks)
13430 self._nic_chgdesc = []
13432 # Operate on copies as this is still in prereq
13433 nics = [nic.Copy() for nic in instance.nics]
13434 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13435 self._CreateNewNic, self._ApplyNicMods, None)
13436 self._new_nics = nics
13437 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13439 self._new_nics = None
13440 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13442 if not self.op.ignore_ipolicy:
13443 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13446 # Fill ispec with backend parameters
13447 ispec[constants.ISPEC_SPINDLE_USE] = \
13448 self.be_new.get(constants.BE_SPINDLE_USE, None)
13449 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13452 # Copy ispec to verify parameters with min/max values separately
13453 ispec_max = ispec.copy()
13454 ispec_max[constants.ISPEC_MEM_SIZE] = \
13455 self.be_new.get(constants.BE_MAXMEM, None)
13456 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13457 ispec_min = ispec.copy()
13458 ispec_min[constants.ISPEC_MEM_SIZE] = \
13459 self.be_new.get(constants.BE_MINMEM, None)
13460 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13462 if (res_max or res_min):
13463 # FIXME: Improve error message by including information about whether
13464 # the upper or lower limit of the parameter fails the ipolicy.
13465 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13466 (group_info, group_info.name,
13467 utils.CommaJoin(set(res_max + res_min))))
13468 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13470 def _ConvertPlainToDrbd(self, feedback_fn):
13471 """Converts an instance from plain to drbd.
13474 feedback_fn("Converting template to drbd")
13475 instance = self.instance
13476 pnode = instance.primary_node
13477 snode = self.op.remote_node
13479 assert instance.disk_template == constants.DT_PLAIN
13481 # create a fake disk info for _GenerateDiskTemplate
13482 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13483 constants.IDISK_VG: d.logical_id[0]}
13484 for d in instance.disks]
13485 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13486 instance.name, pnode, [snode],
13487 disk_info, None, None, 0, feedback_fn,
13489 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13491 info = _GetInstanceInfoText(instance)
13492 feedback_fn("Creating additional volumes...")
13493 # first, create the missing data and meta devices
13494 for disk in anno_disks:
13495 # unfortunately this is... not too nice
13496 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13498 for child in disk.children:
13499 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13500 # at this stage, all new LVs have been created, we can rename the
13502 feedback_fn("Renaming original volumes...")
13503 rename_list = [(o, n.children[0].logical_id)
13504 for (o, n) in zip(instance.disks, new_disks)]
13505 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13506 result.Raise("Failed to rename original LVs")
13508 feedback_fn("Initializing DRBD devices...")
13509 # all child devices are in place, we can now create the DRBD devices
13510 for disk in anno_disks:
13511 for node in [pnode, snode]:
13512 f_create = node == pnode
13513 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13515 # at this point, the instance has been modified
13516 instance.disk_template = constants.DT_DRBD8
13517 instance.disks = new_disks
13518 self.cfg.Update(instance, feedback_fn)
13520 # Release node locks while waiting for sync
13521 _ReleaseLocks(self, locking.LEVEL_NODE)
13523 # disks are created, waiting for sync
13524 disk_abort = not _WaitForSync(self, instance,
13525 oneshot=not self.op.wait_for_sync)
13527 raise errors.OpExecError("There are some degraded disks for"
13528 " this instance, please cleanup manually")
13530 # Node resource locks will be released by caller
13532 def _ConvertDrbdToPlain(self, feedback_fn):
13533 """Converts an instance from drbd to plain.
13536 instance = self.instance
13538 assert len(instance.secondary_nodes) == 1
13539 assert instance.disk_template == constants.DT_DRBD8
13541 pnode = instance.primary_node
13542 snode = instance.secondary_nodes[0]
13543 feedback_fn("Converting template to plain")
13545 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13546 new_disks = [d.children[0] for d in instance.disks]
13548 # copy over size and mode
13549 for parent, child in zip(old_disks, new_disks):
13550 child.size = parent.size
13551 child.mode = parent.mode
13553 # this is a DRBD disk, return its port to the pool
13554 # NOTE: this must be done right before the call to cfg.Update!
13555 for disk in old_disks:
13556 tcp_port = disk.logical_id[2]
13557 self.cfg.AddTcpUdpPort(tcp_port)
13559 # update instance structure
13560 instance.disks = new_disks
13561 instance.disk_template = constants.DT_PLAIN
13562 self.cfg.Update(instance, feedback_fn)
13564 # Release locks in case removing disks takes a while
13565 _ReleaseLocks(self, locking.LEVEL_NODE)
13567 feedback_fn("Removing volumes on the secondary node...")
13568 for disk in old_disks:
13569 self.cfg.SetDiskID(disk, snode)
13570 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13572 self.LogWarning("Could not remove block device %s on node %s,"
13573 " continuing anyway: %s", disk.iv_name, snode, msg)
13575 feedback_fn("Removing unneeded volumes on the primary node...")
13576 for idx, disk in enumerate(old_disks):
13577 meta = disk.children[1]
13578 self.cfg.SetDiskID(meta, pnode)
13579 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13581 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13582 " continuing anyway: %s", idx, pnode, msg)
13584 def _CreateNewDisk(self, idx, params, _):
13585 """Creates a new disk.
13588 instance = self.instance
13591 if instance.disk_template in constants.DTS_FILEBASED:
13592 (file_driver, file_path) = instance.disks[0].logical_id
13593 file_path = os.path.dirname(file_path)
13595 file_driver = file_path = None
13598 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13599 instance.primary_node, instance.secondary_nodes,
13600 [params], file_path, file_driver, idx,
13601 self.Log, self.diskparams)[0]
13603 info = _GetInstanceInfoText(instance)
13605 logging.info("Creating volume %s for instance %s",
13606 disk.iv_name, instance.name)
13607 # Note: this needs to be kept in sync with _CreateDisks
13609 for node in instance.all_nodes:
13610 f_create = (node == instance.primary_node)
13612 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13613 except errors.OpExecError, err:
13614 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13615 disk.iv_name, disk, node, err)
13618 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13622 def _ModifyDisk(idx, disk, params, _):
13623 """Modifies a disk.
13626 disk.mode = params[constants.IDISK_MODE]
13629 ("disk.mode/%d" % idx, disk.mode),
13632 def _RemoveDisk(self, idx, root, _):
13636 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13637 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13638 self.cfg.SetDiskID(disk, node)
13639 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13641 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13642 " continuing anyway", idx, node, msg)
13644 # if this is a DRBD disk, return its port to the pool
13645 if root.dev_type in constants.LDS_DRBD:
13646 self.cfg.AddTcpUdpPort(root.logical_id[2])
13649 def _CreateNewNic(idx, params, private):
13650 """Creates data structure for a new network interface.
13653 mac = params[constants.INIC_MAC]
13654 ip = params.get(constants.INIC_IP, None)
13655 net = params.get(constants.INIC_NETWORK, None)
13656 #TODO: not private.filled?? can a nic have no nicparams??
13657 nicparams = private.filled
13659 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13661 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13662 (mac, ip, private.filled[constants.NIC_MODE],
13663 private.filled[constants.NIC_LINK],
13668 def _ApplyNicMods(idx, nic, params, private):
13669 """Modifies a network interface.
13674 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13676 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13677 setattr(nic, key, params[key])
13680 nic.nicparams = private.filled
13682 for (key, val) in nic.nicparams.items():
13683 changes.append(("nic.%s/%d" % (key, idx), val))
13687 def Exec(self, feedback_fn):
13688 """Modifies an instance.
13690 All parameters take effect only at the next restart of the instance.
13693 # Process here the warnings from CheckPrereq, as we don't have a
13694 # feedback_fn there.
13695 # TODO: Replace with self.LogWarning
13696 for warn in self.warn:
13697 feedback_fn("WARNING: %s" % warn)
13699 assert ((self.op.disk_template is None) ^
13700 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13701 "Not owning any node resource locks"
13704 instance = self.instance
13707 if self.op.runtime_mem:
13708 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13710 self.op.runtime_mem)
13711 rpcres.Raise("Cannot modify instance runtime memory")
13712 result.append(("runtime_memory", self.op.runtime_mem))
13714 # Apply disk changes
13715 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13716 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13717 _UpdateIvNames(0, instance.disks)
13719 if self.op.disk_template:
13721 check_nodes = set(instance.all_nodes)
13722 if self.op.remote_node:
13723 check_nodes.add(self.op.remote_node)
13724 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13725 owned = self.owned_locks(level)
13726 assert not (check_nodes - owned), \
13727 ("Not owning the correct locks, owning %r, expected at least %r" %
13728 (owned, check_nodes))
13730 r_shut = _ShutdownInstanceDisks(self, instance)
13732 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13733 " proceed with disk template conversion")
13734 mode = (instance.disk_template, self.op.disk_template)
13736 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13738 self.cfg.ReleaseDRBDMinors(instance.name)
13740 result.append(("disk_template", self.op.disk_template))
13742 assert instance.disk_template == self.op.disk_template, \
13743 ("Expected disk template '%s', found '%s'" %
13744 (self.op.disk_template, instance.disk_template))
13746 # Release node and resource locks if there are any (they might already have
13747 # been released during disk conversion)
13748 _ReleaseLocks(self, locking.LEVEL_NODE)
13749 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13751 # Apply NIC changes
13752 if self._new_nics is not None:
13753 instance.nics = self._new_nics
13754 result.extend(self._nic_chgdesc)
13757 if self.op.hvparams:
13758 instance.hvparams = self.hv_inst
13759 for key, val in self.op.hvparams.iteritems():
13760 result.append(("hv/%s" % key, val))
13763 if self.op.beparams:
13764 instance.beparams = self.be_inst
13765 for key, val in self.op.beparams.iteritems():
13766 result.append(("be/%s" % key, val))
13769 if self.op.os_name:
13770 instance.os = self.op.os_name
13773 if self.op.osparams:
13774 instance.osparams = self.os_inst
13775 for key, val in self.op.osparams.iteritems():
13776 result.append(("os/%s" % key, val))
13778 if self.op.offline is None:
13781 elif self.op.offline:
13782 # Mark instance as offline
13783 self.cfg.MarkInstanceOffline(instance.name)
13784 result.append(("admin_state", constants.ADMINST_OFFLINE))
13786 # Mark instance as online, but stopped
13787 self.cfg.MarkInstanceDown(instance.name)
13788 result.append(("admin_state", constants.ADMINST_DOWN))
13790 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13792 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13793 self.owned_locks(locking.LEVEL_NODE)), \
13794 "All node locks should have been released by now"
13798 _DISK_CONVERSIONS = {
13799 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13800 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13804 class LUInstanceChangeGroup(LogicalUnit):
13805 HPATH = "instance-change-group"
13806 HTYPE = constants.HTYPE_INSTANCE
13809 def ExpandNames(self):
13810 self.share_locks = _ShareAll()
13812 self.needed_locks = {
13813 locking.LEVEL_NODEGROUP: [],
13814 locking.LEVEL_NODE: [],
13815 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
13818 self._ExpandAndLockInstance()
13820 if self.op.target_groups:
13821 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13822 self.op.target_groups)
13824 self.req_target_uuids = None
13826 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13828 def DeclareLocks(self, level):
13829 if level == locking.LEVEL_NODEGROUP:
13830 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13832 if self.req_target_uuids:
13833 lock_groups = set(self.req_target_uuids)
13835 # Lock all groups used by instance optimistically; this requires going
13836 # via the node before it's locked, requiring verification later on
13837 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13838 lock_groups.update(instance_groups)
13840 # No target groups, need to lock all of them
13841 lock_groups = locking.ALL_SET
13843 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13845 elif level == locking.LEVEL_NODE:
13846 if self.req_target_uuids:
13847 # Lock all nodes used by instances
13848 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13849 self._LockInstancesNodes()
13851 # Lock all nodes in all potential target groups
13852 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13853 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13854 member_nodes = [node_name
13855 for group in lock_groups
13856 for node_name in self.cfg.GetNodeGroup(group).members]
13857 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13859 # Lock all nodes as all groups are potential targets
13860 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13862 def CheckPrereq(self):
13863 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13864 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13865 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13867 assert (self.req_target_uuids is None or
13868 owned_groups.issuperset(self.req_target_uuids))
13869 assert owned_instances == set([self.op.instance_name])
13871 # Get instance information
13872 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13874 # Check if node groups for locked instance are still correct
13875 assert owned_nodes.issuperset(self.instance.all_nodes), \
13876 ("Instance %s's nodes changed while we kept the lock" %
13877 self.op.instance_name)
13879 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13882 if self.req_target_uuids:
13883 # User requested specific target groups
13884 self.target_uuids = frozenset(self.req_target_uuids)
13886 # All groups except those used by the instance are potential targets
13887 self.target_uuids = owned_groups - inst_groups
13889 conflicting_groups = self.target_uuids & inst_groups
13890 if conflicting_groups:
13891 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13892 " used by the instance '%s'" %
13893 (utils.CommaJoin(conflicting_groups),
13894 self.op.instance_name),
13895 errors.ECODE_INVAL)
13897 if not self.target_uuids:
13898 raise errors.OpPrereqError("There are no possible target groups",
13899 errors.ECODE_INVAL)
13901 def BuildHooksEnv(self):
13902 """Build hooks env.
13905 assert self.target_uuids
13908 "TARGET_GROUPS": " ".join(self.target_uuids),
13911 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13915 def BuildHooksNodes(self):
13916 """Build hooks nodes.
13919 mn = self.cfg.GetMasterNode()
13920 return ([mn], [mn])
13922 def Exec(self, feedback_fn):
13923 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13925 assert instances == [self.op.instance_name], "Instance not locked"
13927 req = iallocator.IAReqGroupChange(instances=instances,
13928 target_groups=list(self.target_uuids))
13929 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13931 ial.Run(self.op.iallocator)
13933 if not ial.success:
13934 raise errors.OpPrereqError("Can't compute solution for changing group of"
13935 " instance '%s' using iallocator '%s': %s" %
13936 (self.op.instance_name, self.op.iallocator,
13937 ial.info), errors.ECODE_NORES)
13939 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13941 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13942 " instance '%s'", len(jobs), self.op.instance_name)
13944 return ResultWithJobs(jobs)
13947 class LUBackupQuery(NoHooksLU):
13948 """Query the exports list
13953 def CheckArguments(self):
13954 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13955 ["node", "export"], self.op.use_locking)
13957 def ExpandNames(self):
13958 self.expq.ExpandNames(self)
13960 def DeclareLocks(self, level):
13961 self.expq.DeclareLocks(self, level)
13963 def Exec(self, feedback_fn):
13966 for (node, expname) in self.expq.OldStyleQuery(self):
13967 if expname is None:
13968 result[node] = False
13970 result.setdefault(node, []).append(expname)
13975 class _ExportQuery(_QueryBase):
13976 FIELDS = query.EXPORT_FIELDS
13978 #: The node name is not a unique key for this query
13979 SORT_FIELD = "node"
13981 def ExpandNames(self, lu):
13982 lu.needed_locks = {}
13984 # The following variables interact with _QueryBase._GetNames
13986 self.wanted = _GetWantedNodes(lu, self.names)
13988 self.wanted = locking.ALL_SET
13990 self.do_locking = self.use_locking
13992 if self.do_locking:
13993 lu.share_locks = _ShareAll()
13994 lu.needed_locks = {
13995 locking.LEVEL_NODE: self.wanted,
13999 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14001 def DeclareLocks(self, lu, level):
14004 def _GetQueryData(self, lu):
14005 """Computes the list of nodes and their attributes.
14008 # Locking is not used
14010 assert not (compat.any(lu.glm.is_owned(level)
14011 for level in locking.LEVELS
14012 if level != locking.LEVEL_CLUSTER) or
14013 self.do_locking or self.use_locking)
14015 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14019 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14021 result.append((node, None))
14023 result.extend((node, expname) for expname in nres.payload)
14028 class LUBackupPrepare(NoHooksLU):
14029 """Prepares an instance for an export and returns useful information.
14034 def ExpandNames(self):
14035 self._ExpandAndLockInstance()
14037 def CheckPrereq(self):
14038 """Check prerequisites.
14041 instance_name = self.op.instance_name
14043 self.instance = self.cfg.GetInstanceInfo(instance_name)
14044 assert self.instance is not None, \
14045 "Cannot retrieve locked instance %s" % self.op.instance_name
14046 _CheckNodeOnline(self, self.instance.primary_node)
14048 self._cds = _GetClusterDomainSecret()
14050 def Exec(self, feedback_fn):
14051 """Prepares an instance for an export.
14054 instance = self.instance
14056 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14057 salt = utils.GenerateSecret(8)
14059 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14060 result = self.rpc.call_x509_cert_create(instance.primary_node,
14061 constants.RIE_CERT_VALIDITY)
14062 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14064 (name, cert_pem) = result.payload
14066 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14070 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14071 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14073 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14079 class LUBackupExport(LogicalUnit):
14080 """Export an instance to an image in the cluster.
14083 HPATH = "instance-export"
14084 HTYPE = constants.HTYPE_INSTANCE
14087 def CheckArguments(self):
14088 """Check the arguments.
14091 self.x509_key_name = self.op.x509_key_name
14092 self.dest_x509_ca_pem = self.op.destination_x509_ca
14094 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14095 if not self.x509_key_name:
14096 raise errors.OpPrereqError("Missing X509 key name for encryption",
14097 errors.ECODE_INVAL)
14099 if not self.dest_x509_ca_pem:
14100 raise errors.OpPrereqError("Missing destination X509 CA",
14101 errors.ECODE_INVAL)
14103 def ExpandNames(self):
14104 self._ExpandAndLockInstance()
14106 # Lock all nodes for local exports
14107 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14108 # FIXME: lock only instance primary and destination node
14110 # Sad but true, for now we have do lock all nodes, as we don't know where
14111 # the previous export might be, and in this LU we search for it and
14112 # remove it from its current node. In the future we could fix this by:
14113 # - making a tasklet to search (share-lock all), then create the
14114 # new one, then one to remove, after
14115 # - removing the removal operation altogether
14116 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14118 # Allocations should be stopped while this LU runs with node locks, but
14119 # it doesn't have to be exclusive
14120 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14121 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14123 def DeclareLocks(self, level):
14124 """Last minute lock declaration."""
14125 # All nodes are locked anyway, so nothing to do here.
14127 def BuildHooksEnv(self):
14128 """Build hooks env.
14130 This will run on the master, primary node and target node.
14134 "EXPORT_MODE": self.op.mode,
14135 "EXPORT_NODE": self.op.target_node,
14136 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14137 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14138 # TODO: Generic function for boolean env variables
14139 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14142 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14146 def BuildHooksNodes(self):
14147 """Build hooks nodes.
14150 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14152 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14153 nl.append(self.op.target_node)
14157 def CheckPrereq(self):
14158 """Check prerequisites.
14160 This checks that the instance and node names are valid.
14163 instance_name = self.op.instance_name
14165 self.instance = self.cfg.GetInstanceInfo(instance_name)
14166 assert self.instance is not None, \
14167 "Cannot retrieve locked instance %s" % self.op.instance_name
14168 _CheckNodeOnline(self, self.instance.primary_node)
14170 if (self.op.remove_instance and
14171 self.instance.admin_state == constants.ADMINST_UP and
14172 not self.op.shutdown):
14173 raise errors.OpPrereqError("Can not remove instance without shutting it"
14174 " down before", errors.ECODE_STATE)
14176 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14177 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14178 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14179 assert self.dst_node is not None
14181 _CheckNodeOnline(self, self.dst_node.name)
14182 _CheckNodeNotDrained(self, self.dst_node.name)
14185 self.dest_disk_info = None
14186 self.dest_x509_ca = None
14188 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14189 self.dst_node = None
14191 if len(self.op.target_node) != len(self.instance.disks):
14192 raise errors.OpPrereqError(("Received destination information for %s"
14193 " disks, but instance %s has %s disks") %
14194 (len(self.op.target_node), instance_name,
14195 len(self.instance.disks)),
14196 errors.ECODE_INVAL)
14198 cds = _GetClusterDomainSecret()
14200 # Check X509 key name
14202 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14203 except (TypeError, ValueError), err:
14204 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14205 errors.ECODE_INVAL)
14207 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14208 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14209 errors.ECODE_INVAL)
14211 # Load and verify CA
14213 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14214 except OpenSSL.crypto.Error, err:
14215 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14216 (err, ), errors.ECODE_INVAL)
14218 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14219 if errcode is not None:
14220 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14221 (msg, ), errors.ECODE_INVAL)
14223 self.dest_x509_ca = cert
14225 # Verify target information
14227 for idx, disk_data in enumerate(self.op.target_node):
14229 (host, port, magic) = \
14230 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14231 except errors.GenericError, err:
14232 raise errors.OpPrereqError("Target info for disk %s: %s" %
14233 (idx, err), errors.ECODE_INVAL)
14235 disk_info.append((host, port, magic))
14237 assert len(disk_info) == len(self.op.target_node)
14238 self.dest_disk_info = disk_info
14241 raise errors.ProgrammerError("Unhandled export mode %r" %
14244 # instance disk type verification
14245 # TODO: Implement export support for file-based disks
14246 for disk in self.instance.disks:
14247 if disk.dev_type == constants.LD_FILE:
14248 raise errors.OpPrereqError("Export not supported for instances with"
14249 " file-based disks", errors.ECODE_INVAL)
14251 def _CleanupExports(self, feedback_fn):
14252 """Removes exports of current instance from all other nodes.
14254 If an instance in a cluster with nodes A..D was exported to node C, its
14255 exports will be removed from the nodes A, B and D.
14258 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14260 nodelist = self.cfg.GetNodeList()
14261 nodelist.remove(self.dst_node.name)
14263 # on one-node clusters nodelist will be empty after the removal
14264 # if we proceed the backup would be removed because OpBackupQuery
14265 # substitutes an empty list with the full cluster node list.
14266 iname = self.instance.name
14268 feedback_fn("Removing old exports for instance %s" % iname)
14269 exportlist = self.rpc.call_export_list(nodelist)
14270 for node in exportlist:
14271 if exportlist[node].fail_msg:
14273 if iname in exportlist[node].payload:
14274 msg = self.rpc.call_export_remove(node, iname).fail_msg
14276 self.LogWarning("Could not remove older export for instance %s"
14277 " on node %s: %s", iname, node, msg)
14279 def Exec(self, feedback_fn):
14280 """Export an instance to an image in the cluster.
14283 assert self.op.mode in constants.EXPORT_MODES
14285 instance = self.instance
14286 src_node = instance.primary_node
14288 if self.op.shutdown:
14289 # shutdown the instance, but not the disks
14290 feedback_fn("Shutting down instance %s" % instance.name)
14291 result = self.rpc.call_instance_shutdown(src_node, instance,
14292 self.op.shutdown_timeout)
14293 # TODO: Maybe ignore failures if ignore_remove_failures is set
14294 result.Raise("Could not shutdown instance %s on"
14295 " node %s" % (instance.name, src_node))
14297 # set the disks ID correctly since call_instance_start needs the
14298 # correct drbd minor to create the symlinks
14299 for disk in instance.disks:
14300 self.cfg.SetDiskID(disk, src_node)
14302 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14305 # Activate the instance disks if we'exporting a stopped instance
14306 feedback_fn("Activating disks for %s" % instance.name)
14307 _StartInstanceDisks(self, instance, None)
14310 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14313 helper.CreateSnapshots()
14315 if (self.op.shutdown and
14316 instance.admin_state == constants.ADMINST_UP and
14317 not self.op.remove_instance):
14318 assert not activate_disks
14319 feedback_fn("Starting instance %s" % instance.name)
14320 result = self.rpc.call_instance_start(src_node,
14321 (instance, None, None), False)
14322 msg = result.fail_msg
14324 feedback_fn("Failed to start instance: %s" % msg)
14325 _ShutdownInstanceDisks(self, instance)
14326 raise errors.OpExecError("Could not start instance: %s" % msg)
14328 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14329 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14330 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14331 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14332 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14334 (key_name, _, _) = self.x509_key_name
14337 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14340 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14341 key_name, dest_ca_pem,
14346 # Check for backwards compatibility
14347 assert len(dresults) == len(instance.disks)
14348 assert compat.all(isinstance(i, bool) for i in dresults), \
14349 "Not all results are boolean: %r" % dresults
14353 feedback_fn("Deactivating disks for %s" % instance.name)
14354 _ShutdownInstanceDisks(self, instance)
14356 if not (compat.all(dresults) and fin_resu):
14359 failures.append("export finalization")
14360 if not compat.all(dresults):
14361 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14363 failures.append("disk export: disk(s) %s" % fdsk)
14365 raise errors.OpExecError("Export failed, errors in %s" %
14366 utils.CommaJoin(failures))
14368 # At this point, the export was successful, we can cleanup/finish
14370 # Remove instance if requested
14371 if self.op.remove_instance:
14372 feedback_fn("Removing instance %s" % instance.name)
14373 _RemoveInstance(self, feedback_fn, instance,
14374 self.op.ignore_remove_failures)
14376 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14377 self._CleanupExports(feedback_fn)
14379 return fin_resu, dresults
14382 class LUBackupRemove(NoHooksLU):
14383 """Remove exports related to the named instance.
14388 def ExpandNames(self):
14389 self.needed_locks = {
14390 # We need all nodes to be locked in order for RemoveExport to work, but
14391 # we don't need to lock the instance itself, as nothing will happen to it
14392 # (and we can remove exports also for a removed instance)
14393 locking.LEVEL_NODE: locking.ALL_SET,
14395 # Removing backups is quick, so blocking allocations is justified
14396 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14399 # Allocations should be stopped while this LU runs with node locks, but it
14400 # doesn't have to be exclusive
14401 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14403 def Exec(self, feedback_fn):
14404 """Remove any export.
14407 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14408 # If the instance was not found we'll try with the name that was passed in.
14409 # This will only work if it was an FQDN, though.
14411 if not instance_name:
14413 instance_name = self.op.instance_name
14415 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14416 exportlist = self.rpc.call_export_list(locked_nodes)
14418 for node in exportlist:
14419 msg = exportlist[node].fail_msg
14421 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14423 if instance_name in exportlist[node].payload:
14425 result = self.rpc.call_export_remove(node, instance_name)
14426 msg = result.fail_msg
14428 logging.error("Could not remove export for instance %s"
14429 " on node %s: %s", instance_name, node, msg)
14431 if fqdn_warn and not found:
14432 feedback_fn("Export not found. If trying to remove an export belonging"
14433 " to a deleted instance please use its Fully Qualified"
14437 class LUGroupAdd(LogicalUnit):
14438 """Logical unit for creating node groups.
14441 HPATH = "group-add"
14442 HTYPE = constants.HTYPE_GROUP
14445 def ExpandNames(self):
14446 # We need the new group's UUID here so that we can create and acquire the
14447 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14448 # that it should not check whether the UUID exists in the configuration.
14449 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14450 self.needed_locks = {}
14451 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14453 def CheckPrereq(self):
14454 """Check prerequisites.
14456 This checks that the given group name is not an existing node group
14461 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14462 except errors.OpPrereqError:
14465 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14466 " node group (UUID: %s)" %
14467 (self.op.group_name, existing_uuid),
14468 errors.ECODE_EXISTS)
14470 if self.op.ndparams:
14471 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14473 if self.op.hv_state:
14474 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14476 self.new_hv_state = None
14478 if self.op.disk_state:
14479 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14481 self.new_disk_state = None
14483 if self.op.diskparams:
14484 for templ in constants.DISK_TEMPLATES:
14485 if templ in self.op.diskparams:
14486 utils.ForceDictType(self.op.diskparams[templ],
14487 constants.DISK_DT_TYPES)
14488 self.new_diskparams = self.op.diskparams
14490 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14491 except errors.OpPrereqError, err:
14492 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14493 errors.ECODE_INVAL)
14495 self.new_diskparams = {}
14497 if self.op.ipolicy:
14498 cluster = self.cfg.GetClusterInfo()
14499 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14501 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14502 except errors.ConfigurationError, err:
14503 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14504 errors.ECODE_INVAL)
14506 def BuildHooksEnv(self):
14507 """Build hooks env.
14511 "GROUP_NAME": self.op.group_name,
14514 def BuildHooksNodes(self):
14515 """Build hooks nodes.
14518 mn = self.cfg.GetMasterNode()
14519 return ([mn], [mn])
14521 def Exec(self, feedback_fn):
14522 """Add the node group to the cluster.
14525 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14526 uuid=self.group_uuid,
14527 alloc_policy=self.op.alloc_policy,
14528 ndparams=self.op.ndparams,
14529 diskparams=self.new_diskparams,
14530 ipolicy=self.op.ipolicy,
14531 hv_state_static=self.new_hv_state,
14532 disk_state_static=self.new_disk_state)
14534 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14535 del self.remove_locks[locking.LEVEL_NODEGROUP]
14538 class LUGroupAssignNodes(NoHooksLU):
14539 """Logical unit for assigning nodes to groups.
14544 def ExpandNames(self):
14545 # These raise errors.OpPrereqError on their own:
14546 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14547 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14549 # We want to lock all the affected nodes and groups. We have readily
14550 # available the list of nodes, and the *destination* group. To gather the
14551 # list of "source" groups, we need to fetch node information later on.
14552 self.needed_locks = {
14553 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14554 locking.LEVEL_NODE: self.op.nodes,
14557 def DeclareLocks(self, level):
14558 if level == locking.LEVEL_NODEGROUP:
14559 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14561 # Try to get all affected nodes' groups without having the group or node
14562 # lock yet. Needs verification later in the code flow.
14563 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14565 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14567 def CheckPrereq(self):
14568 """Check prerequisites.
14571 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14572 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14573 frozenset(self.op.nodes))
14575 expected_locks = (set([self.group_uuid]) |
14576 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14577 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14578 if actual_locks != expected_locks:
14579 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14580 " current groups are '%s', used to be '%s'" %
14581 (utils.CommaJoin(expected_locks),
14582 utils.CommaJoin(actual_locks)))
14584 self.node_data = self.cfg.GetAllNodesInfo()
14585 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14586 instance_data = self.cfg.GetAllInstancesInfo()
14588 if self.group is None:
14589 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14590 (self.op.group_name, self.group_uuid))
14592 (new_splits, previous_splits) = \
14593 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14594 for node in self.op.nodes],
14595 self.node_data, instance_data)
14598 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14600 if not self.op.force:
14601 raise errors.OpExecError("The following instances get split by this"
14602 " change and --force was not given: %s" %
14605 self.LogWarning("This operation will split the following instances: %s",
14608 if previous_splits:
14609 self.LogWarning("In addition, these already-split instances continue"
14610 " to be split across groups: %s",
14611 utils.CommaJoin(utils.NiceSort(previous_splits)))
14613 def Exec(self, feedback_fn):
14614 """Assign nodes to a new group.
14617 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14619 self.cfg.AssignGroupNodes(mods)
14622 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14623 """Check for split instances after a node assignment.
14625 This method considers a series of node assignments as an atomic operation,
14626 and returns information about split instances after applying the set of
14629 In particular, it returns information about newly split instances, and
14630 instances that were already split, and remain so after the change.
14632 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14635 @type changes: list of (node_name, new_group_uuid) pairs.
14636 @param changes: list of node assignments to consider.
14637 @param node_data: a dict with data for all nodes
14638 @param instance_data: a dict with all instances to consider
14639 @rtype: a two-tuple
14640 @return: a list of instances that were previously okay and result split as a
14641 consequence of this change, and a list of instances that were previously
14642 split and this change does not fix.
14645 changed_nodes = dict((node, group) for node, group in changes
14646 if node_data[node].group != group)
14648 all_split_instances = set()
14649 previously_split_instances = set()
14651 def InstanceNodes(instance):
14652 return [instance.primary_node] + list(instance.secondary_nodes)
14654 for inst in instance_data.values():
14655 if inst.disk_template not in constants.DTS_INT_MIRROR:
14658 instance_nodes = InstanceNodes(inst)
14660 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14661 previously_split_instances.add(inst.name)
14663 if len(set(changed_nodes.get(node, node_data[node].group)
14664 for node in instance_nodes)) > 1:
14665 all_split_instances.add(inst.name)
14667 return (list(all_split_instances - previously_split_instances),
14668 list(previously_split_instances & all_split_instances))
14671 class _GroupQuery(_QueryBase):
14672 FIELDS = query.GROUP_FIELDS
14674 def ExpandNames(self, lu):
14675 lu.needed_locks = {}
14677 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14678 self._cluster = lu.cfg.GetClusterInfo()
14679 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14682 self.wanted = [name_to_uuid[name]
14683 for name in utils.NiceSort(name_to_uuid.keys())]
14685 # Accept names to be either names or UUIDs.
14688 all_uuid = frozenset(self._all_groups.keys())
14690 for name in self.names:
14691 if name in all_uuid:
14692 self.wanted.append(name)
14693 elif name in name_to_uuid:
14694 self.wanted.append(name_to_uuid[name])
14696 missing.append(name)
14699 raise errors.OpPrereqError("Some groups do not exist: %s" %
14700 utils.CommaJoin(missing),
14701 errors.ECODE_NOENT)
14703 def DeclareLocks(self, lu, level):
14706 def _GetQueryData(self, lu):
14707 """Computes the list of node groups and their attributes.
14710 do_nodes = query.GQ_NODE in self.requested_data
14711 do_instances = query.GQ_INST in self.requested_data
14713 group_to_nodes = None
14714 group_to_instances = None
14716 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14717 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14718 # latter GetAllInstancesInfo() is not enough, for we have to go through
14719 # instance->node. Hence, we will need to process nodes even if we only need
14720 # instance information.
14721 if do_nodes or do_instances:
14722 all_nodes = lu.cfg.GetAllNodesInfo()
14723 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14726 for node in all_nodes.values():
14727 if node.group in group_to_nodes:
14728 group_to_nodes[node.group].append(node.name)
14729 node_to_group[node.name] = node.group
14732 all_instances = lu.cfg.GetAllInstancesInfo()
14733 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14735 for instance in all_instances.values():
14736 node = instance.primary_node
14737 if node in node_to_group:
14738 group_to_instances[node_to_group[node]].append(instance.name)
14741 # Do not pass on node information if it was not requested.
14742 group_to_nodes = None
14744 return query.GroupQueryData(self._cluster,
14745 [self._all_groups[uuid]
14746 for uuid in self.wanted],
14747 group_to_nodes, group_to_instances,
14748 query.GQ_DISKPARAMS in self.requested_data)
14751 class LUGroupQuery(NoHooksLU):
14752 """Logical unit for querying node groups.
14757 def CheckArguments(self):
14758 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14759 self.op.output_fields, False)
14761 def ExpandNames(self):
14762 self.gq.ExpandNames(self)
14764 def DeclareLocks(self, level):
14765 self.gq.DeclareLocks(self, level)
14767 def Exec(self, feedback_fn):
14768 return self.gq.OldStyleQuery(self)
14771 class LUGroupSetParams(LogicalUnit):
14772 """Modifies the parameters of a node group.
14775 HPATH = "group-modify"
14776 HTYPE = constants.HTYPE_GROUP
14779 def CheckArguments(self):
14782 self.op.diskparams,
14783 self.op.alloc_policy,
14785 self.op.disk_state,
14789 if all_changes.count(None) == len(all_changes):
14790 raise errors.OpPrereqError("Please pass at least one modification",
14791 errors.ECODE_INVAL)
14793 def ExpandNames(self):
14794 # This raises errors.OpPrereqError on its own:
14795 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14797 self.needed_locks = {
14798 locking.LEVEL_INSTANCE: [],
14799 locking.LEVEL_NODEGROUP: [self.group_uuid],
14802 self.share_locks[locking.LEVEL_INSTANCE] = 1
14804 def DeclareLocks(self, level):
14805 if level == locking.LEVEL_INSTANCE:
14806 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14808 # Lock instances optimistically, needs verification once group lock has
14810 self.needed_locks[locking.LEVEL_INSTANCE] = \
14811 self.cfg.GetNodeGroupInstances(self.group_uuid)
14814 def _UpdateAndVerifyDiskParams(old, new):
14815 """Updates and verifies disk parameters.
14818 new_params = _GetUpdatedParams(old, new)
14819 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14822 def CheckPrereq(self):
14823 """Check prerequisites.
14826 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14828 # Check if locked instances are still correct
14829 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14831 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14832 cluster = self.cfg.GetClusterInfo()
14834 if self.group is None:
14835 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14836 (self.op.group_name, self.group_uuid))
14838 if self.op.ndparams:
14839 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14840 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14841 self.new_ndparams = new_ndparams
14843 if self.op.diskparams:
14844 diskparams = self.group.diskparams
14845 uavdp = self._UpdateAndVerifyDiskParams
14846 # For each disktemplate subdict update and verify the values
14847 new_diskparams = dict((dt,
14848 uavdp(diskparams.get(dt, {}),
14849 self.op.diskparams[dt]))
14850 for dt in constants.DISK_TEMPLATES
14851 if dt in self.op.diskparams)
14852 # As we've all subdicts of diskparams ready, lets merge the actual
14853 # dict with all updated subdicts
14854 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14856 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14857 except errors.OpPrereqError, err:
14858 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14859 errors.ECODE_INVAL)
14861 if self.op.hv_state:
14862 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14863 self.group.hv_state_static)
14865 if self.op.disk_state:
14866 self.new_disk_state = \
14867 _MergeAndVerifyDiskState(self.op.disk_state,
14868 self.group.disk_state_static)
14870 if self.op.ipolicy:
14871 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14875 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14876 inst_filter = lambda inst: inst.name in owned_instances
14877 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14878 gmi = ganeti.masterd.instance
14880 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14882 new_ipolicy, instances)
14885 self.LogWarning("After the ipolicy change the following instances"
14886 " violate them: %s",
14887 utils.CommaJoin(violations))
14889 def BuildHooksEnv(self):
14890 """Build hooks env.
14894 "GROUP_NAME": self.op.group_name,
14895 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14898 def BuildHooksNodes(self):
14899 """Build hooks nodes.
14902 mn = self.cfg.GetMasterNode()
14903 return ([mn], [mn])
14905 def Exec(self, feedback_fn):
14906 """Modifies the node group.
14911 if self.op.ndparams:
14912 self.group.ndparams = self.new_ndparams
14913 result.append(("ndparams", str(self.group.ndparams)))
14915 if self.op.diskparams:
14916 self.group.diskparams = self.new_diskparams
14917 result.append(("diskparams", str(self.group.diskparams)))
14919 if self.op.alloc_policy:
14920 self.group.alloc_policy = self.op.alloc_policy
14922 if self.op.hv_state:
14923 self.group.hv_state_static = self.new_hv_state
14925 if self.op.disk_state:
14926 self.group.disk_state_static = self.new_disk_state
14928 if self.op.ipolicy:
14929 self.group.ipolicy = self.new_ipolicy
14931 self.cfg.Update(self.group, feedback_fn)
14935 class LUGroupRemove(LogicalUnit):
14936 HPATH = "group-remove"
14937 HTYPE = constants.HTYPE_GROUP
14940 def ExpandNames(self):
14941 # This will raises errors.OpPrereqError on its own:
14942 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14943 self.needed_locks = {
14944 locking.LEVEL_NODEGROUP: [self.group_uuid],
14947 def CheckPrereq(self):
14948 """Check prerequisites.
14950 This checks that the given group name exists as a node group, that is
14951 empty (i.e., contains no nodes), and that is not the last group of the
14955 # Verify that the group is empty.
14956 group_nodes = [node.name
14957 for node in self.cfg.GetAllNodesInfo().values()
14958 if node.group == self.group_uuid]
14961 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14963 (self.op.group_name,
14964 utils.CommaJoin(utils.NiceSort(group_nodes))),
14965 errors.ECODE_STATE)
14967 # Verify the cluster would not be left group-less.
14968 if len(self.cfg.GetNodeGroupList()) == 1:
14969 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14970 " removed" % self.op.group_name,
14971 errors.ECODE_STATE)
14973 def BuildHooksEnv(self):
14974 """Build hooks env.
14978 "GROUP_NAME": self.op.group_name,
14981 def BuildHooksNodes(self):
14982 """Build hooks nodes.
14985 mn = self.cfg.GetMasterNode()
14986 return ([mn], [mn])
14988 def Exec(self, feedback_fn):
14989 """Remove the node group.
14993 self.cfg.RemoveNodeGroup(self.group_uuid)
14994 except errors.ConfigurationError:
14995 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14996 (self.op.group_name, self.group_uuid))
14998 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15001 class LUGroupRename(LogicalUnit):
15002 HPATH = "group-rename"
15003 HTYPE = constants.HTYPE_GROUP
15006 def ExpandNames(self):
15007 # This raises errors.OpPrereqError on its own:
15008 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15010 self.needed_locks = {
15011 locking.LEVEL_NODEGROUP: [self.group_uuid],
15014 def CheckPrereq(self):
15015 """Check prerequisites.
15017 Ensures requested new name is not yet used.
15021 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15022 except errors.OpPrereqError:
15025 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15026 " node group (UUID: %s)" %
15027 (self.op.new_name, new_name_uuid),
15028 errors.ECODE_EXISTS)
15030 def BuildHooksEnv(self):
15031 """Build hooks env.
15035 "OLD_NAME": self.op.group_name,
15036 "NEW_NAME": self.op.new_name,
15039 def BuildHooksNodes(self):
15040 """Build hooks nodes.
15043 mn = self.cfg.GetMasterNode()
15045 all_nodes = self.cfg.GetAllNodesInfo()
15046 all_nodes.pop(mn, None)
15049 run_nodes.extend(node.name for node in all_nodes.values()
15050 if node.group == self.group_uuid)
15052 return (run_nodes, run_nodes)
15054 def Exec(self, feedback_fn):
15055 """Rename the node group.
15058 group = self.cfg.GetNodeGroup(self.group_uuid)
15061 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15062 (self.op.group_name, self.group_uuid))
15064 group.name = self.op.new_name
15065 self.cfg.Update(group, feedback_fn)
15067 return self.op.new_name
15070 class LUGroupEvacuate(LogicalUnit):
15071 HPATH = "group-evacuate"
15072 HTYPE = constants.HTYPE_GROUP
15075 def ExpandNames(self):
15076 # This raises errors.OpPrereqError on its own:
15077 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15079 if self.op.target_groups:
15080 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15081 self.op.target_groups)
15083 self.req_target_uuids = []
15085 if self.group_uuid in self.req_target_uuids:
15086 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15087 " as a target group (targets are %s)" %
15089 utils.CommaJoin(self.req_target_uuids)),
15090 errors.ECODE_INVAL)
15092 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15094 self.share_locks = _ShareAll()
15095 self.needed_locks = {
15096 locking.LEVEL_INSTANCE: [],
15097 locking.LEVEL_NODEGROUP: [],
15098 locking.LEVEL_NODE: [],
15101 def DeclareLocks(self, level):
15102 if level == locking.LEVEL_INSTANCE:
15103 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15105 # Lock instances optimistically, needs verification once node and group
15106 # locks have been acquired
15107 self.needed_locks[locking.LEVEL_INSTANCE] = \
15108 self.cfg.GetNodeGroupInstances(self.group_uuid)
15110 elif level == locking.LEVEL_NODEGROUP:
15111 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15113 if self.req_target_uuids:
15114 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15116 # Lock all groups used by instances optimistically; this requires going
15117 # via the node before it's locked, requiring verification later on
15118 lock_groups.update(group_uuid
15119 for instance_name in
15120 self.owned_locks(locking.LEVEL_INSTANCE)
15122 self.cfg.GetInstanceNodeGroups(instance_name))
15124 # No target groups, need to lock all of them
15125 lock_groups = locking.ALL_SET
15127 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15129 elif level == locking.LEVEL_NODE:
15130 # This will only lock the nodes in the group to be evacuated which
15131 # contain actual instances
15132 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15133 self._LockInstancesNodes()
15135 # Lock all nodes in group to be evacuated and target groups
15136 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15137 assert self.group_uuid in owned_groups
15138 member_nodes = [node_name
15139 for group in owned_groups
15140 for node_name in self.cfg.GetNodeGroup(group).members]
15141 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15143 def CheckPrereq(self):
15144 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15145 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15146 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15148 assert owned_groups.issuperset(self.req_target_uuids)
15149 assert self.group_uuid in owned_groups
15151 # Check if locked instances are still correct
15152 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15154 # Get instance information
15155 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15157 # Check if node groups for locked instances are still correct
15158 _CheckInstancesNodeGroups(self.cfg, self.instances,
15159 owned_groups, owned_nodes, self.group_uuid)
15161 if self.req_target_uuids:
15162 # User requested specific target groups
15163 self.target_uuids = self.req_target_uuids
15165 # All groups except the one to be evacuated are potential targets
15166 self.target_uuids = [group_uuid for group_uuid in owned_groups
15167 if group_uuid != self.group_uuid]
15169 if not self.target_uuids:
15170 raise errors.OpPrereqError("There are no possible target groups",
15171 errors.ECODE_INVAL)
15173 def BuildHooksEnv(self):
15174 """Build hooks env.
15178 "GROUP_NAME": self.op.group_name,
15179 "TARGET_GROUPS": " ".join(self.target_uuids),
15182 def BuildHooksNodes(self):
15183 """Build hooks nodes.
15186 mn = self.cfg.GetMasterNode()
15188 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15190 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15192 return (run_nodes, run_nodes)
15194 def Exec(self, feedback_fn):
15195 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15197 assert self.group_uuid not in self.target_uuids
15199 req = iallocator.IAReqGroupChange(instances=instances,
15200 target_groups=self.target_uuids)
15201 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15203 ial.Run(self.op.iallocator)
15205 if not ial.success:
15206 raise errors.OpPrereqError("Can't compute group evacuation using"
15207 " iallocator '%s': %s" %
15208 (self.op.iallocator, ial.info),
15209 errors.ECODE_NORES)
15211 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15213 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15214 len(jobs), self.op.group_name)
15216 return ResultWithJobs(jobs)
15219 class TagsLU(NoHooksLU): # pylint: disable=W0223
15220 """Generic tags LU.
15222 This is an abstract class which is the parent of all the other tags LUs.
15225 def ExpandNames(self):
15226 self.group_uuid = None
15227 self.needed_locks = {}
15229 if self.op.kind == constants.TAG_NODE:
15230 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15231 lock_level = locking.LEVEL_NODE
15232 lock_name = self.op.name
15233 elif self.op.kind == constants.TAG_INSTANCE:
15234 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15235 lock_level = locking.LEVEL_INSTANCE
15236 lock_name = self.op.name
15237 elif self.op.kind == constants.TAG_NODEGROUP:
15238 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15239 lock_level = locking.LEVEL_NODEGROUP
15240 lock_name = self.group_uuid
15241 elif self.op.kind == constants.TAG_NETWORK:
15242 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15243 lock_level = locking.LEVEL_NETWORK
15244 lock_name = self.network_uuid
15249 if lock_level and getattr(self.op, "use_locking", True):
15250 self.needed_locks[lock_level] = lock_name
15252 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15253 # not possible to acquire the BGL based on opcode parameters)
15255 def CheckPrereq(self):
15256 """Check prerequisites.
15259 if self.op.kind == constants.TAG_CLUSTER:
15260 self.target = self.cfg.GetClusterInfo()
15261 elif self.op.kind == constants.TAG_NODE:
15262 self.target = self.cfg.GetNodeInfo(self.op.name)
15263 elif self.op.kind == constants.TAG_INSTANCE:
15264 self.target = self.cfg.GetInstanceInfo(self.op.name)
15265 elif self.op.kind == constants.TAG_NODEGROUP:
15266 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15267 elif self.op.kind == constants.TAG_NETWORK:
15268 self.target = self.cfg.GetNetwork(self.network_uuid)
15270 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15271 str(self.op.kind), errors.ECODE_INVAL)
15274 class LUTagsGet(TagsLU):
15275 """Returns the tags of a given object.
15280 def ExpandNames(self):
15281 TagsLU.ExpandNames(self)
15283 # Share locks as this is only a read operation
15284 self.share_locks = _ShareAll()
15286 def Exec(self, feedback_fn):
15287 """Returns the tag list.
15290 return list(self.target.GetTags())
15293 class LUTagsSearch(NoHooksLU):
15294 """Searches the tags for a given pattern.
15299 def ExpandNames(self):
15300 self.needed_locks = {}
15302 def CheckPrereq(self):
15303 """Check prerequisites.
15305 This checks the pattern passed for validity by compiling it.
15309 self.re = re.compile(self.op.pattern)
15310 except re.error, err:
15311 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15312 (self.op.pattern, err), errors.ECODE_INVAL)
15314 def Exec(self, feedback_fn):
15315 """Returns the tag list.
15319 tgts = [("/cluster", cfg.GetClusterInfo())]
15320 ilist = cfg.GetAllInstancesInfo().values()
15321 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15322 nlist = cfg.GetAllNodesInfo().values()
15323 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15324 tgts.extend(("/nodegroup/%s" % n.name, n)
15325 for n in cfg.GetAllNodeGroupsInfo().values())
15327 for path, target in tgts:
15328 for tag in target.GetTags():
15329 if self.re.search(tag):
15330 results.append((path, tag))
15334 class LUTagsSet(TagsLU):
15335 """Sets a tag on a given object.
15340 def CheckPrereq(self):
15341 """Check prerequisites.
15343 This checks the type and length of the tag name and value.
15346 TagsLU.CheckPrereq(self)
15347 for tag in self.op.tags:
15348 objects.TaggableObject.ValidateTag(tag)
15350 def Exec(self, feedback_fn):
15355 for tag in self.op.tags:
15356 self.target.AddTag(tag)
15357 except errors.TagError, err:
15358 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15359 self.cfg.Update(self.target, feedback_fn)
15362 class LUTagsDel(TagsLU):
15363 """Delete a list of tags from a given object.
15368 def CheckPrereq(self):
15369 """Check prerequisites.
15371 This checks that we have the given tag.
15374 TagsLU.CheckPrereq(self)
15375 for tag in self.op.tags:
15376 objects.TaggableObject.ValidateTag(tag)
15377 del_tags = frozenset(self.op.tags)
15378 cur_tags = self.target.GetTags()
15380 diff_tags = del_tags - cur_tags
15382 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15383 raise errors.OpPrereqError("Tag(s) %s not found" %
15384 (utils.CommaJoin(diff_names), ),
15385 errors.ECODE_NOENT)
15387 def Exec(self, feedback_fn):
15388 """Remove the tag from the object.
15391 for tag in self.op.tags:
15392 self.target.RemoveTag(tag)
15393 self.cfg.Update(self.target, feedback_fn)
15396 class LUTestDelay(NoHooksLU):
15397 """Sleep for a specified amount of time.
15399 This LU sleeps on the master and/or nodes for a specified amount of
15405 def ExpandNames(self):
15406 """Expand names and set required locks.
15408 This expands the node list, if any.
15411 self.needed_locks = {}
15412 if self.op.on_nodes:
15413 # _GetWantedNodes can be used here, but is not always appropriate to use
15414 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15415 # more information.
15416 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15417 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15419 def _TestDelay(self):
15420 """Do the actual sleep.
15423 if self.op.on_master:
15424 if not utils.TestDelay(self.op.duration):
15425 raise errors.OpExecError("Error during master delay test")
15426 if self.op.on_nodes:
15427 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15428 for node, node_result in result.items():
15429 node_result.Raise("Failure during rpc call to node %s" % node)
15431 def Exec(self, feedback_fn):
15432 """Execute the test delay opcode, with the wanted repetitions.
15435 if self.op.repeat == 0:
15438 top_value = self.op.repeat - 1
15439 for i in range(self.op.repeat):
15440 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15444 class LURestrictedCommand(NoHooksLU):
15445 """Logical unit for executing restricted commands.
15450 def ExpandNames(self):
15452 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15454 self.needed_locks = {
15455 locking.LEVEL_NODE: self.op.nodes,
15457 self.share_locks = {
15458 locking.LEVEL_NODE: not self.op.use_locking,
15461 def CheckPrereq(self):
15462 """Check prerequisites.
15466 def Exec(self, feedback_fn):
15467 """Execute restricted command and return output.
15470 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15472 # Check if correct locks are held
15473 assert set(self.op.nodes).issubset(owned_nodes)
15475 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15479 for node_name in self.op.nodes:
15480 nres = rpcres[node_name]
15482 msg = ("Command '%s' on node '%s' failed: %s" %
15483 (self.op.command, node_name, nres.fail_msg))
15484 result.append((False, msg))
15486 result.append((True, nres.payload))
15491 class LUTestJqueue(NoHooksLU):
15492 """Utility LU to test some aspects of the job queue.
15497 # Must be lower than default timeout for WaitForJobChange to see whether it
15498 # notices changed jobs
15499 _CLIENT_CONNECT_TIMEOUT = 20.0
15500 _CLIENT_CONFIRM_TIMEOUT = 60.0
15503 def _NotifyUsingSocket(cls, cb, errcls):
15504 """Opens a Unix socket and waits for another program to connect.
15507 @param cb: Callback to send socket name to client
15508 @type errcls: class
15509 @param errcls: Exception class to use for errors
15512 # Using a temporary directory as there's no easy way to create temporary
15513 # sockets without writing a custom loop around tempfile.mktemp and
15515 tmpdir = tempfile.mkdtemp()
15517 tmpsock = utils.PathJoin(tmpdir, "sock")
15519 logging.debug("Creating temporary socket at %s", tmpsock)
15520 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15525 # Send details to client
15528 # Wait for client to connect before continuing
15529 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15531 (conn, _) = sock.accept()
15532 except socket.error, err:
15533 raise errcls("Client didn't connect in time (%s)" % err)
15537 # Remove as soon as client is connected
15538 shutil.rmtree(tmpdir)
15540 # Wait for client to close
15543 # pylint: disable=E1101
15544 # Instance of '_socketobject' has no ... member
15545 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15547 except socket.error, err:
15548 raise errcls("Client failed to confirm notification (%s)" % err)
15552 def _SendNotification(self, test, arg, sockname):
15553 """Sends a notification to the client.
15556 @param test: Test name
15557 @param arg: Test argument (depends on test)
15558 @type sockname: string
15559 @param sockname: Socket path
15562 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15564 def _Notify(self, prereq, test, arg):
15565 """Notifies the client of a test.
15568 @param prereq: Whether this is a prereq-phase test
15570 @param test: Test name
15571 @param arg: Test argument (depends on test)
15575 errcls = errors.OpPrereqError
15577 errcls = errors.OpExecError
15579 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15583 def CheckArguments(self):
15584 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15585 self.expandnames_calls = 0
15587 def ExpandNames(self):
15588 checkargs_calls = getattr(self, "checkargs_calls", 0)
15589 if checkargs_calls < 1:
15590 raise errors.ProgrammerError("CheckArguments was not called")
15592 self.expandnames_calls += 1
15594 if self.op.notify_waitlock:
15595 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15597 self.LogInfo("Expanding names")
15599 # Get lock on master node (just to get a lock, not for a particular reason)
15600 self.needed_locks = {
15601 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15604 def Exec(self, feedback_fn):
15605 if self.expandnames_calls < 1:
15606 raise errors.ProgrammerError("ExpandNames was not called")
15608 if self.op.notify_exec:
15609 self._Notify(False, constants.JQT_EXEC, None)
15611 self.LogInfo("Executing")
15613 if self.op.log_messages:
15614 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15615 for idx, msg in enumerate(self.op.log_messages):
15616 self.LogInfo("Sending log message %s", idx + 1)
15617 feedback_fn(constants.JQT_MSGPREFIX + msg)
15618 # Report how many test messages have been sent
15619 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15622 raise errors.OpExecError("Opcode failure was requested")
15627 class LUTestAllocator(NoHooksLU):
15628 """Run allocator tests.
15630 This LU runs the allocator tests
15633 def CheckPrereq(self):
15634 """Check prerequisites.
15636 This checks the opcode parameters depending on the director and mode test.
15639 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15640 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15641 for attr in ["memory", "disks", "disk_template",
15642 "os", "tags", "nics", "vcpus"]:
15643 if not hasattr(self.op, attr):
15644 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15645 attr, errors.ECODE_INVAL)
15646 iname = self.cfg.ExpandInstanceName(self.op.name)
15647 if iname is not None:
15648 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15649 iname, errors.ECODE_EXISTS)
15650 if not isinstance(self.op.nics, list):
15651 raise errors.OpPrereqError("Invalid parameter 'nics'",
15652 errors.ECODE_INVAL)
15653 if not isinstance(self.op.disks, list):
15654 raise errors.OpPrereqError("Invalid parameter 'disks'",
15655 errors.ECODE_INVAL)
15656 for row in self.op.disks:
15657 if (not isinstance(row, dict) or
15658 constants.IDISK_SIZE not in row or
15659 not isinstance(row[constants.IDISK_SIZE], int) or
15660 constants.IDISK_MODE not in row or
15661 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15662 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15663 " parameter", errors.ECODE_INVAL)
15664 if self.op.hypervisor is None:
15665 self.op.hypervisor = self.cfg.GetHypervisorType()
15666 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15667 fname = _ExpandInstanceName(self.cfg, self.op.name)
15668 self.op.name = fname
15669 self.relocate_from = \
15670 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15671 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15672 constants.IALLOCATOR_MODE_NODE_EVAC):
15673 if not self.op.instances:
15674 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15675 self.op.instances = _GetWantedInstances(self, self.op.instances)
15677 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15678 self.op.mode, errors.ECODE_INVAL)
15680 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15681 if self.op.iallocator is None:
15682 raise errors.OpPrereqError("Missing allocator name",
15683 errors.ECODE_INVAL)
15684 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15685 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15686 self.op.direction, errors.ECODE_INVAL)
15688 def Exec(self, feedback_fn):
15689 """Run the allocator test.
15692 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15693 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15694 memory=self.op.memory,
15695 disks=self.op.disks,
15696 disk_template=self.op.disk_template,
15700 vcpus=self.op.vcpus,
15701 spindle_use=self.op.spindle_use,
15702 hypervisor=self.op.hypervisor)
15703 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15704 req = iallocator.IAReqRelocate(name=self.op.name,
15705 relocate_from=list(self.relocate_from))
15706 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15707 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15708 target_groups=self.op.target_groups)
15709 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15710 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15711 evac_mode=self.op.evac_mode)
15712 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15713 disk_template = self.op.disk_template
15714 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15715 memory=self.op.memory,
15716 disks=self.op.disks,
15717 disk_template=disk_template,
15721 vcpus=self.op.vcpus,
15722 spindle_use=self.op.spindle_use,
15723 hypervisor=self.op.hypervisor)
15724 for idx in range(self.op.count)]
15725 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15727 raise errors.ProgrammerError("Uncatched mode %s in"
15728 " LUTestAllocator.Exec", self.op.mode)
15730 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15731 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15732 result = ial.in_text
15734 ial.Run(self.op.iallocator, validate=False)
15735 result = ial.out_text
15739 class LUNetworkAdd(LogicalUnit):
15740 """Logical unit for creating networks.
15743 HPATH = "network-add"
15744 HTYPE = constants.HTYPE_NETWORK
15747 def BuildHooksNodes(self):
15748 """Build hooks nodes.
15751 mn = self.cfg.GetMasterNode()
15752 return ([mn], [mn])
15754 def CheckArguments(self):
15755 if self.op.mac_prefix:
15756 self.op.mac_prefix = \
15757 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15759 def ExpandNames(self):
15760 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15762 if self.op.conflicts_check:
15763 self.share_locks[locking.LEVEL_NODE] = 1
15764 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
15765 self.needed_locks = {
15766 locking.LEVEL_NODE: locking.ALL_SET,
15767 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
15770 self.needed_locks = {}
15772 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15774 def CheckPrereq(self):
15775 if self.op.network is None:
15776 raise errors.OpPrereqError("Network must be given",
15777 errors.ECODE_INVAL)
15779 uuid = self.cfg.LookupNetwork(self.op.network_name)
15782 raise errors.OpPrereqError("Network '%s' already defined" %
15783 self.op.network, errors.ECODE_EXISTS)
15785 # Check tag validity
15786 for tag in self.op.tags:
15787 objects.TaggableObject.ValidateTag(tag)
15789 def BuildHooksEnv(self):
15790 """Build hooks env.
15794 "name": self.op.network_name,
15795 "subnet": self.op.network,
15796 "gateway": self.op.gateway,
15797 "network6": self.op.network6,
15798 "gateway6": self.op.gateway6,
15799 "mac_prefix": self.op.mac_prefix,
15800 "network_type": self.op.network_type,
15801 "tags": self.op.tags,
15803 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15805 def Exec(self, feedback_fn):
15806 """Add the ip pool to the cluster.
15809 nobj = objects.Network(name=self.op.network_name,
15810 network=self.op.network,
15811 gateway=self.op.gateway,
15812 network6=self.op.network6,
15813 gateway6=self.op.gateway6,
15814 mac_prefix=self.op.mac_prefix,
15815 network_type=self.op.network_type,
15816 uuid=self.network_uuid,
15817 family=constants.IP4_VERSION)
15818 # Initialize the associated address pool
15820 pool = network.AddressPool.InitializeNetwork(nobj)
15821 except errors.AddressPoolError, e:
15822 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15824 # Check if we need to reserve the nodes and the cluster master IP
15825 # These may not be allocated to any instances in routed mode, as
15826 # they wouldn't function anyway.
15827 if self.op.conflicts_check:
15828 for node in self.cfg.GetAllNodesInfo().values():
15829 for ip in [node.primary_ip, node.secondary_ip]:
15831 if pool.Contains(ip):
15833 self.LogInfo("Reserved IP address of node '%s' (%s)",
15835 except errors.AddressPoolError:
15836 self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
15839 master_ip = self.cfg.GetClusterInfo().master_ip
15841 if pool.Contains(master_ip):
15842 pool.Reserve(master_ip)
15843 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15844 except errors.AddressPoolError:
15845 self.LogWarning("Cannot reserve cluster master IP address (%s)",
15848 if self.op.add_reserved_ips:
15849 for ip in self.op.add_reserved_ips:
15851 pool.Reserve(ip, external=True)
15852 except errors.AddressPoolError, e:
15853 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15856 for tag in self.op.tags:
15859 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15860 del self.remove_locks[locking.LEVEL_NETWORK]
15863 class LUNetworkRemove(LogicalUnit):
15864 HPATH = "network-remove"
15865 HTYPE = constants.HTYPE_NETWORK
15868 def ExpandNames(self):
15869 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15871 if not self.network_uuid:
15872 raise errors.OpPrereqError(("Network '%s' not found" %
15873 self.op.network_name),
15874 errors.ECODE_INVAL)
15876 self.share_locks[locking.LEVEL_NODEGROUP] = 1
15877 self.needed_locks = {
15878 locking.LEVEL_NETWORK: [self.network_uuid],
15879 locking.LEVEL_NODEGROUP: locking.ALL_SET,
15882 def CheckPrereq(self):
15883 """Check prerequisites.
15885 This checks that the given network name exists as a network, that is
15886 empty (i.e., contains no nodes), and that is not the last group of the
15890 # Verify that the network is not conncted.
15891 node_groups = [group.name
15892 for group in self.cfg.GetAllNodeGroupsInfo().values()
15893 if self.network_uuid in group.networks]
15896 self.LogWarning("Network '%s' is connected to the following"
15897 " node groups: %s" %
15898 (self.op.network_name,
15899 utils.CommaJoin(utils.NiceSort(node_groups))))
15900 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
15902 def BuildHooksEnv(self):
15903 """Build hooks env.
15907 "NETWORK_NAME": self.op.network_name,
15910 def BuildHooksNodes(self):
15911 """Build hooks nodes.
15914 mn = self.cfg.GetMasterNode()
15915 return ([mn], [mn])
15917 def Exec(self, feedback_fn):
15918 """Remove the network.
15922 self.cfg.RemoveNetwork(self.network_uuid)
15923 except errors.ConfigurationError:
15924 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15925 (self.op.network_name, self.network_uuid))
15928 class LUNetworkSetParams(LogicalUnit):
15929 """Modifies the parameters of a network.
15932 HPATH = "network-modify"
15933 HTYPE = constants.HTYPE_NETWORK
15936 def CheckArguments(self):
15937 if (self.op.gateway and
15938 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15939 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15940 " at once", errors.ECODE_INVAL)
15942 def ExpandNames(self):
15943 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15944 if self.network_uuid is None:
15945 raise errors.OpPrereqError(("Network '%s' not found" %
15946 self.op.network_name),
15947 errors.ECODE_INVAL)
15949 self.needed_locks = {
15950 locking.LEVEL_NETWORK: [self.network_uuid],
15953 def CheckPrereq(self):
15954 """Check prerequisites.
15957 self.network = self.cfg.GetNetwork(self.network_uuid)
15958 self.gateway = self.network.gateway
15959 self.network_type = self.network.network_type
15960 self.mac_prefix = self.network.mac_prefix
15961 self.network6 = self.network.network6
15962 self.gateway6 = self.network.gateway6
15963 self.tags = self.network.tags
15965 self.pool = network.AddressPool(self.network)
15967 if self.op.gateway:
15968 if self.op.gateway == constants.VALUE_NONE:
15969 self.gateway = None
15971 self.gateway = self.op.gateway
15972 if self.pool.IsReserved(self.gateway):
15973 raise errors.OpPrereqError("%s is already reserved" %
15974 self.gateway, errors.ECODE_INVAL)
15976 if self.op.network_type:
15977 if self.op.network_type == constants.VALUE_NONE:
15978 self.network_type = None
15980 self.network_type = self.op.network_type
15982 if self.op.mac_prefix:
15983 if self.op.mac_prefix == constants.VALUE_NONE:
15984 self.mac_prefix = None
15986 self.mac_prefix = \
15987 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15989 if self.op.gateway6:
15990 if self.op.gateway6 == constants.VALUE_NONE:
15991 self.gateway6 = None
15993 self.gateway6 = self.op.gateway6
15995 if self.op.network6:
15996 if self.op.network6 == constants.VALUE_NONE:
15997 self.network6 = None
15999 self.network6 = self.op.network6
16001 def BuildHooksEnv(self):
16002 """Build hooks env.
16006 "name": self.op.network_name,
16007 "subnet": self.network.network,
16008 "gateway": self.gateway,
16009 "network6": self.network6,
16010 "gateway6": self.gateway6,
16011 "mac_prefix": self.mac_prefix,
16012 "network_type": self.network_type,
16015 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16017 def BuildHooksNodes(self):
16018 """Build hooks nodes.
16021 mn = self.cfg.GetMasterNode()
16022 return ([mn], [mn])
16024 def Exec(self, feedback_fn):
16025 """Modifies the network.
16028 #TODO: reserve/release via temporary reservation manager
16029 # extend cfg.ReserveIp/ReleaseIp with the external flag
16030 if self.op.gateway:
16031 if self.gateway == self.network.gateway:
16032 self.LogWarning("Gateway is already %s", self.gateway)
16035 self.pool.Reserve(self.gateway, external=True)
16036 if self.network.gateway:
16037 self.pool.Release(self.network.gateway, external=True)
16038 self.network.gateway = self.gateway
16040 if self.op.add_reserved_ips:
16041 for ip in self.op.add_reserved_ips:
16043 if self.pool.IsReserved(ip):
16044 self.LogWarning("IP address %s is already reserved", ip)
16046 self.pool.Reserve(ip, external=True)
16047 except errors.AddressPoolError, err:
16048 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16050 if self.op.remove_reserved_ips:
16051 for ip in self.op.remove_reserved_ips:
16052 if ip == self.network.gateway:
16053 self.LogWarning("Cannot unreserve Gateway's IP")
16056 if not self.pool.IsReserved(ip):
16057 self.LogWarning("IP address %s is already unreserved", ip)
16059 self.pool.Release(ip, external=True)
16060 except errors.AddressPoolError, err:
16061 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16063 if self.op.mac_prefix:
16064 self.network.mac_prefix = self.mac_prefix
16066 if self.op.network6:
16067 self.network.network6 = self.network6
16069 if self.op.gateway6:
16070 self.network.gateway6 = self.gateway6
16072 if self.op.network_type:
16073 self.network.network_type = self.network_type
16075 self.pool.Validate()
16077 self.cfg.Update(self.network, feedback_fn)
16080 class _NetworkQuery(_QueryBase):
16081 FIELDS = query.NETWORK_FIELDS
16083 def ExpandNames(self, lu):
16084 lu.needed_locks = {}
16086 self._all_networks = lu.cfg.GetAllNetworksInfo()
16087 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16090 self.wanted = [name_to_uuid[name]
16091 for name in utils.NiceSort(name_to_uuid.keys())]
16093 # Accept names to be either names or UUIDs.
16096 all_uuid = frozenset(self._all_networks.keys())
16098 for name in self.names:
16099 if name in all_uuid:
16100 self.wanted.append(name)
16101 elif name in name_to_uuid:
16102 self.wanted.append(name_to_uuid[name])
16104 missing.append(name)
16107 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16108 errors.ECODE_NOENT)
16110 def DeclareLocks(self, lu, level):
16113 def _GetQueryData(self, lu):
16114 """Computes the list of networks and their attributes.
16117 do_instances = query.NETQ_INST in self.requested_data
16118 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16119 do_stats = query.NETQ_STATS in self.requested_data
16121 network_to_groups = None
16122 network_to_instances = None
16125 # For NETQ_GROUP, we need to map network->[groups]
16127 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16128 network_to_groups = dict((uuid, []) for uuid in self.wanted)
16131 all_instances = lu.cfg.GetAllInstancesInfo()
16132 all_nodes = lu.cfg.GetAllNodesInfo()
16133 network_to_instances = dict((uuid, []) for uuid in self.wanted)
16135 for group in all_groups.values():
16137 group_nodes = [node.name for node in all_nodes.values() if
16138 node.group == group.uuid]
16139 group_instances = [instance for instance in all_instances.values()
16140 if instance.primary_node in group_nodes]
16142 for net_uuid in group.networks.keys():
16143 if net_uuid in network_to_groups:
16144 netparams = group.networks[net_uuid]
16145 mode = netparams[constants.NIC_MODE]
16146 link = netparams[constants.NIC_LINK]
16147 info = group.name + "(" + mode + ", " + link + ")"
16148 network_to_groups[net_uuid].append(info)
16151 for instance in group_instances:
16152 for nic in instance.nics:
16153 if nic.network == self._all_networks[net_uuid].name:
16154 network_to_instances[net_uuid].append(instance.name)
16159 for uuid, net in self._all_networks.items():
16160 if uuid in self.wanted:
16161 pool = network.AddressPool(net)
16163 "free_count": pool.GetFreeCount(),
16164 "reserved_count": pool.GetReservedCount(),
16165 "map": pool.GetMap(),
16166 "external_reservations":
16167 utils.CommaJoin(pool.GetExternalReservations()),
16170 return query.NetworkQueryData([self._all_networks[uuid]
16171 for uuid in self.wanted],
16173 network_to_instances,
16177 class LUNetworkQuery(NoHooksLU):
16178 """Logical unit for querying networks.
16183 def CheckArguments(self):
16184 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16185 self.op.output_fields, False)
16187 def ExpandNames(self):
16188 self.nq.ExpandNames(self)
16190 def Exec(self, feedback_fn):
16191 return self.nq.OldStyleQuery(self)
16194 class LUNetworkConnect(LogicalUnit):
16195 """Connect a network to a nodegroup
16198 HPATH = "network-connect"
16199 HTYPE = constants.HTYPE_NETWORK
16202 def ExpandNames(self):
16203 self.network_name = self.op.network_name
16204 self.group_name = self.op.group_name
16205 self.network_mode = self.op.network_mode
16206 self.network_link = self.op.network_link
16208 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16209 if self.network_uuid is None:
16210 raise errors.OpPrereqError("Network %s does not exist" %
16211 self.network_name, errors.ECODE_INVAL)
16213 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16214 if self.group_uuid is None:
16215 raise errors.OpPrereqError("Group %s does not exist" %
16216 self.group_name, errors.ECODE_INVAL)
16218 self.needed_locks = {
16219 locking.LEVEL_INSTANCE: [],
16220 locking.LEVEL_NODEGROUP: [self.group_uuid],
16222 self.share_locks[locking.LEVEL_INSTANCE] = 1
16224 if self.op.conflicts_check:
16225 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16226 self.share_locks[locking.LEVEL_NETWORK] = 1
16228 def DeclareLocks(self, level):
16229 if level == locking.LEVEL_INSTANCE:
16230 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16232 # Lock instances optimistically, needs verification once group lock has
16234 if self.op.conflicts_check:
16235 self.needed_locks[locking.LEVEL_INSTANCE] = \
16236 self.cfg.GetNodeGroupInstances(self.group_uuid)
16238 def BuildHooksEnv(self):
16240 "GROUP_NAME": self.group_name,
16241 "GROUP_NETWORK_MODE": self.network_mode,
16242 "GROUP_NETWORK_LINK": self.network_link,
16246 def BuildHooksNodes(self):
16247 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16248 return (nodes, nodes)
16250 def CheckPrereq(self):
16251 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16253 assert self.group_uuid in owned_groups
16255 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16259 constants.NIC_MODE: self.network_mode,
16260 constants.NIC_LINK: self.network_link,
16262 objects.NIC.CheckParameterSyntax(self.netparams)
16264 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16265 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16266 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16267 self.connected = False
16268 if self.network_uuid in self.group.networks:
16269 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16270 (self.network_name, self.group.name))
16271 self.connected = True
16274 if self.op.conflicts_check:
16275 # Check if locked instances are still correct
16276 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16277 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16279 nobj = self.cfg.GetNetwork(self.network_uuid)
16280 pool = network.AddressPool(nobj)
16281 conflicting_instances = []
16283 for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16284 for idx, nic in enumerate(instance.nics):
16285 if pool.Contains(nic.ip):
16286 conflicting_instances.append((instance.name, idx, nic.ip))
16288 if conflicting_instances:
16289 self.LogWarning("Following occurences use IPs from network %s"
16290 " that is about to connect to nodegroup %s: %s" %
16291 (self.network_name, self.group.name,
16292 l(conflicting_instances)))
16293 raise errors.OpPrereqError("Conflicting IPs found."
16294 " Please remove/modify"
16295 " corresponding NICs",
16296 errors.ECODE_INVAL)
16298 def Exec(self, feedback_fn):
16302 self.group.networks[self.network_uuid] = self.netparams
16303 self.cfg.Update(self.group, feedback_fn)
16306 class LUNetworkDisconnect(LogicalUnit):
16307 """Disconnect a network to a nodegroup
16310 HPATH = "network-disconnect"
16311 HTYPE = constants.HTYPE_NETWORK
16314 def ExpandNames(self):
16315 self.network_name = self.op.network_name
16316 self.group_name = self.op.group_name
16318 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16319 if self.network_uuid is None:
16320 raise errors.OpPrereqError("Network %s does not exist" %
16321 self.network_name, errors.ECODE_INVAL)
16323 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16324 if self.group_uuid is None:
16325 raise errors.OpPrereqError("Group %s does not exist" %
16326 self.group_name, errors.ECODE_INVAL)
16328 self.needed_locks = {
16329 locking.LEVEL_INSTANCE: [],
16330 locking.LEVEL_NODEGROUP: [self.group_uuid],
16332 self.share_locks[locking.LEVEL_INSTANCE] = 1
16334 def DeclareLocks(self, level):
16335 if level == locking.LEVEL_INSTANCE:
16336 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16338 # Lock instances optimistically, needs verification once group lock has
16340 if self.op.conflicts_check:
16341 self.needed_locks[locking.LEVEL_INSTANCE] = \
16342 self.cfg.GetNodeGroupInstances(self.group_uuid)
16344 def BuildHooksEnv(self):
16346 "GROUP_NAME": self.group_name,
16350 def BuildHooksNodes(self):
16351 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16352 return (nodes, nodes)
16354 def CheckPrereq(self):
16355 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16357 assert self.group_uuid in owned_groups
16359 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16362 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16363 self.connected = True
16364 if self.network_uuid not in self.group.networks:
16365 self.LogWarning("Network '%s' is not mapped to group '%s'",
16366 self.network_name, self.group.name)
16367 self.connected = False
16370 if self.op.conflicts_check:
16371 # Check if locked instances are still correct
16372 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16373 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16375 conflicting_instances = []
16377 for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16378 for idx, nic in enumerate(instance.nics):
16379 if nic.network == self.network_name:
16380 conflicting_instances.append((instance.name, idx, nic.ip))
16382 if conflicting_instances:
16383 self.LogWarning("Following occurences use IPs from network %s"
16384 " that is about to disconnected from the nodegroup"
16386 (self.network_name, self.group.name,
16387 l(conflicting_instances)))
16388 raise errors.OpPrereqError("Conflicting IPs."
16389 " Please remove/modify"
16390 " corresponding NICS",
16391 errors.ECODE_INVAL)
16393 def Exec(self, feedback_fn):
16394 if not self.connected:
16397 del self.group.networks[self.network_uuid]
16398 self.cfg.Update(self.group, feedback_fn)
16401 #: Query type implementations
16403 constants.QR_CLUSTER: _ClusterQuery,
16404 constants.QR_INSTANCE: _InstanceQuery,
16405 constants.QR_NODE: _NodeQuery,
16406 constants.QR_GROUP: _GroupQuery,
16407 constants.QR_NETWORK: _NetworkQuery,
16408 constants.QR_OS: _OsQuery,
16409 constants.QR_EXPORT: _ExportQuery,
16412 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16415 def _GetQueryImplementation(name):
16416 """Returns the implemtnation for a query type.
16418 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16422 return _QUERY_IMPL[name]
16424 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16425 errors.ECODE_INVAL)
16428 def _CheckForConflictingIp(lu, ip, node):
16429 """In case of conflicting ip raise error.
16432 @param ip: ip address
16434 @param node: node name
16437 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16438 if conf_net is not None:
16439 raise errors.OpPrereqError("Conflicting IP found:"
16440 " %s <> %s." % (ip, conf_net),
16441 errors.ECODE_INVAL)
16443 return (None, None)