4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
645 """Checks if the owned node groups are still correct for an instance.
647 @type cfg: L{config.ConfigWriter}
648 @param cfg: The cluster configuration
649 @type instance_name: string
650 @param instance_name: Instance name
651 @type owned_groups: set or frozenset
652 @param owned_groups: List of currently owned node groups
653 @type primary_only: boolean
654 @param primary_only: Whether to check node groups for only the primary node
657 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
659 if not owned_groups.issuperset(inst_groups):
660 raise errors.OpPrereqError("Instance %s's node groups changed since"
661 " locks were acquired, current groups are"
662 " are '%s', owning groups '%s'; retry the"
665 utils.CommaJoin(inst_groups),
666 utils.CommaJoin(owned_groups)),
672 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
673 """Checks if the instances in a node group are still correct.
675 @type cfg: L{config.ConfigWriter}
676 @param cfg: The cluster configuration
677 @type group_uuid: string
678 @param group_uuid: Node group UUID
679 @type owned_instances: set or frozenset
680 @param owned_instances: List of currently owned instances
683 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
684 if owned_instances != wanted_instances:
685 raise errors.OpPrereqError("Instances in node group '%s' changed since"
686 " locks were acquired, wanted '%s', have '%s';"
687 " retry the operation" %
689 utils.CommaJoin(wanted_instances),
690 utils.CommaJoin(owned_instances)),
693 return wanted_instances
696 def _SupportsOob(cfg, node):
697 """Tells if node supports OOB.
699 @type cfg: L{config.ConfigWriter}
700 @param cfg: The cluster configuration
701 @type node: L{objects.Node}
702 @param node: The node
703 @return: The OOB script if supported or an empty string otherwise
706 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
709 def _GetWantedNodes(lu, nodes):
710 """Returns list of checked and expanded node names.
712 @type lu: L{LogicalUnit}
713 @param lu: the logical unit on whose behalf we execute
715 @param nodes: list of node names or None for all nodes
717 @return: the list of nodes, sorted
718 @raise errors.ProgrammerError: if the nodes parameter is wrong type
722 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
724 return utils.NiceSort(lu.cfg.GetNodeList())
727 def _GetWantedInstances(lu, instances):
728 """Returns list of checked and expanded instance names.
730 @type lu: L{LogicalUnit}
731 @param lu: the logical unit on whose behalf we execute
732 @type instances: list
733 @param instances: list of instance names or None for all instances
735 @return: the list of instances, sorted
736 @raise errors.OpPrereqError: if the instances parameter is wrong type
737 @raise errors.OpPrereqError: if any of the passed instances is not found
741 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
743 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
747 def _GetUpdatedParams(old_params, update_dict,
748 use_default=True, use_none=False):
749 """Return the new version of a parameter dictionary.
751 @type old_params: dict
752 @param old_params: old parameters
753 @type update_dict: dict
754 @param update_dict: dict containing new parameter values, or
755 constants.VALUE_DEFAULT to reset the parameter to its default
757 @param use_default: boolean
758 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
759 values as 'to be deleted' values
760 @param use_none: boolean
761 @type use_none: whether to recognise C{None} values as 'to be
764 @return: the new parameter dictionary
767 params_copy = copy.deepcopy(old_params)
768 for key, val in update_dict.iteritems():
769 if ((use_default and val == constants.VALUE_DEFAULT) or
770 (use_none and val is None)):
776 params_copy[key] = val
780 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
781 """Return the new version of a instance policy.
783 @param group_policy: whether this policy applies to a group and thus
784 we should support removal of policy entries
787 use_none = use_default = group_policy
788 ipolicy = copy.deepcopy(old_ipolicy)
789 for key, value in new_ipolicy.items():
790 if key not in constants.IPOLICY_ALL_KEYS:
791 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
793 if key in constants.IPOLICY_ISPECS:
794 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
795 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
797 use_default=use_default)
799 if (not value or value == [constants.VALUE_DEFAULT] or
800 value == constants.VALUE_DEFAULT):
804 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
805 " on the cluster'" % key,
808 if key in constants.IPOLICY_PARAMETERS:
809 # FIXME: we assume all such values are float
811 ipolicy[key] = float(value)
812 except (TypeError, ValueError), err:
813 raise errors.OpPrereqError("Invalid value for attribute"
814 " '%s': '%s', error: %s" %
815 (key, value, err), errors.ECODE_INVAL)
817 # FIXME: we assume all others are lists; this should be redone
819 ipolicy[key] = list(value)
821 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
822 except errors.ConfigurationError, err:
823 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
828 def _UpdateAndVerifySubDict(base, updates, type_check):
829 """Updates and verifies a dict with sub dicts of the same type.
831 @param base: The dict with the old data
832 @param updates: The dict with the new data
833 @param type_check: Dict suitable to ForceDictType to verify correct types
834 @returns: A new dict with updated and verified values
838 new = _GetUpdatedParams(old, value)
839 utils.ForceDictType(new, type_check)
842 ret = copy.deepcopy(base)
843 ret.update(dict((key, fn(base.get(key, {}), value))
844 for key, value in updates.items()))
848 def _MergeAndVerifyHvState(op_input, obj_input):
849 """Combines the hv state from an opcode with the one of the object
851 @param op_input: The input dict from the opcode
852 @param obj_input: The input dict from the objects
853 @return: The verified and updated dict
857 invalid_hvs = set(op_input) - constants.HYPER_TYPES
859 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
860 " %s" % utils.CommaJoin(invalid_hvs),
862 if obj_input is None:
864 type_check = constants.HVSTS_PARAMETER_TYPES
865 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
870 def _MergeAndVerifyDiskState(op_input, obj_input):
871 """Combines the disk state from an opcode with the one of the object
873 @param op_input: The input dict from the opcode
874 @param obj_input: The input dict from the objects
875 @return: The verified and updated dict
878 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
880 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
881 utils.CommaJoin(invalid_dst),
883 type_check = constants.DSS_PARAMETER_TYPES
884 if obj_input is None:
886 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
888 for key, value in op_input.items())
893 def _ReleaseLocks(lu, level, names=None, keep=None):
894 """Releases locks owned by an LU.
896 @type lu: L{LogicalUnit}
897 @param level: Lock level
898 @type names: list or None
899 @param names: Names of locks to release
900 @type keep: list or None
901 @param keep: Names of locks to retain
904 assert not (keep is not None and names is not None), \
905 "Only one of the 'names' and the 'keep' parameters can be given"
907 if names is not None:
908 should_release = names.__contains__
910 should_release = lambda name: name not in keep
912 should_release = None
914 owned = lu.owned_locks(level)
916 # Not owning any lock at this level, do nothing
923 # Determine which locks to release
925 if should_release(name):
930 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
932 # Release just some locks
933 lu.glm.release(level, names=release)
935 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
938 lu.glm.release(level)
940 assert not lu.glm.is_owned(level), "No locks should be owned"
943 def _MapInstanceDisksToNodes(instances):
944 """Creates a map from (node, volume) to instance name.
946 @type instances: list of L{objects.Instance}
947 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
950 return dict(((node, vol), inst.name)
951 for inst in instances
952 for (node, vols) in inst.MapLVsByNode().items()
956 def _RunPostHook(lu, node_name):
957 """Runs the post-hook for an opcode on a single node.
960 hm = lu.proc.BuildHooksManager(lu)
962 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
964 # pylint: disable=W0702
965 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
968 def _CheckOutputFields(static, dynamic, selected):
969 """Checks whether all selected fields are valid.
971 @type static: L{utils.FieldSet}
972 @param static: static fields set
973 @type dynamic: L{utils.FieldSet}
974 @param dynamic: dynamic fields set
981 delta = f.NonMatching(selected)
983 raise errors.OpPrereqError("Unknown output fields selected: %s"
984 % ",".join(delta), errors.ECODE_INVAL)
987 def _CheckGlobalHvParams(params):
988 """Validates that given hypervisor params are not global ones.
990 This will ensure that instances don't get customised versions of
994 used_globals = constants.HVC_GLOBALS.intersection(params)
996 msg = ("The following hypervisor parameters are global and cannot"
997 " be customized at instance level, please modify them at"
998 " cluster level: %s" % utils.CommaJoin(used_globals))
999 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1002 def _CheckNodeOnline(lu, node, msg=None):
1003 """Ensure that a given node is online.
1005 @param lu: the LU on behalf of which we make the check
1006 @param node: the node to check
1007 @param msg: if passed, should be a message to replace the default one
1008 @raise errors.OpPrereqError: if the node is offline
1012 msg = "Can't use offline node"
1013 if lu.cfg.GetNodeInfo(node).offline:
1014 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1017 def _CheckNodeNotDrained(lu, node):
1018 """Ensure that a given node is not drained.
1020 @param lu: the LU on behalf of which we make the check
1021 @param node: the node to check
1022 @raise errors.OpPrereqError: if the node is drained
1025 if lu.cfg.GetNodeInfo(node).drained:
1026 raise errors.OpPrereqError("Can't use drained node %s" % node,
1030 def _CheckNodeVmCapable(lu, node):
1031 """Ensure that a given node is vm capable.
1033 @param lu: the LU on behalf of which we make the check
1034 @param node: the node to check
1035 @raise errors.OpPrereqError: if the node is not vm capable
1038 if not lu.cfg.GetNodeInfo(node).vm_capable:
1039 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1043 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1044 """Ensure that a node supports a given OS.
1046 @param lu: the LU on behalf of which we make the check
1047 @param node: the node to check
1048 @param os_name: the OS to query about
1049 @param force_variant: whether to ignore variant errors
1050 @raise errors.OpPrereqError: if the node is not supporting the OS
1053 result = lu.rpc.call_os_get(node, os_name)
1054 result.Raise("OS '%s' not in supported OS list for node %s" %
1056 prereq=True, ecode=errors.ECODE_INVAL)
1057 if not force_variant:
1058 _CheckOSVariant(result.payload, os_name)
1061 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1062 """Ensure that a node has the given secondary ip.
1064 @type lu: L{LogicalUnit}
1065 @param lu: the LU on behalf of which we make the check
1067 @param node: the node to check
1068 @type secondary_ip: string
1069 @param secondary_ip: the ip to check
1070 @type prereq: boolean
1071 @param prereq: whether to throw a prerequisite or an execute error
1072 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1073 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1076 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1077 result.Raise("Failure checking secondary ip on node %s" % node,
1078 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1079 if not result.payload:
1080 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1081 " please fix and re-run this command" % secondary_ip)
1083 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1085 raise errors.OpExecError(msg)
1088 def _GetClusterDomainSecret():
1089 """Reads the cluster domain secret.
1092 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1096 def _CheckInstanceState(lu, instance, req_states, msg=None):
1097 """Ensure that an instance is in one of the required states.
1099 @param lu: the LU on behalf of which we make the check
1100 @param instance: the instance to check
1101 @param msg: if passed, should be a message to replace the default one
1102 @raise errors.OpPrereqError: if the instance is not in the required state
1106 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1107 if instance.admin_state not in req_states:
1108 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1109 (instance.name, instance.admin_state, msg),
1112 if constants.ADMINST_UP not in req_states:
1113 pnode = instance.primary_node
1114 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1115 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1116 prereq=True, ecode=errors.ECODE_ENVIRON)
1118 if instance.name in ins_l.payload:
1119 raise errors.OpPrereqError("Instance %s is running, %s" %
1120 (instance.name, msg), errors.ECODE_STATE)
1123 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1124 """Computes if value is in the desired range.
1126 @param name: name of the parameter for which we perform the check
1127 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1129 @param ipolicy: dictionary containing min, max and std values
1130 @param value: actual value that we want to use
1131 @return: None or element not meeting the criteria
1135 if value in [None, constants.VALUE_AUTO]:
1137 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1138 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1139 if value > max_v or min_v > value:
1141 fqn = "%s/%s" % (name, qualifier)
1144 return ("%s value %s is not in range [%s, %s]" %
1145 (fqn, value, min_v, max_v))
1149 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1150 nic_count, disk_sizes, spindle_use,
1151 _compute_fn=_ComputeMinMaxSpec):
1152 """Verifies ipolicy against provided specs.
1155 @param ipolicy: The ipolicy
1157 @param mem_size: The memory size
1158 @type cpu_count: int
1159 @param cpu_count: Used cpu cores
1160 @type disk_count: int
1161 @param disk_count: Number of disks used
1162 @type nic_count: int
1163 @param nic_count: Number of nics used
1164 @type disk_sizes: list of ints
1165 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1166 @type spindle_use: int
1167 @param spindle_use: The number of spindles this instance uses
1168 @param _compute_fn: The compute function (unittest only)
1169 @return: A list of violations, or an empty list of no violations are found
1172 assert disk_count == len(disk_sizes)
1175 (constants.ISPEC_MEM_SIZE, "", mem_size),
1176 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1177 (constants.ISPEC_DISK_COUNT, "", disk_count),
1178 (constants.ISPEC_NIC_COUNT, "", nic_count),
1179 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1180 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1181 for idx, d in enumerate(disk_sizes)]
1184 (_compute_fn(name, qualifier, ipolicy, value)
1185 for (name, qualifier, value) in test_settings))
1188 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1189 _compute_fn=_ComputeIPolicySpecViolation):
1190 """Compute if instance meets the specs of ipolicy.
1193 @param ipolicy: The ipolicy to verify against
1194 @type instance: L{objects.Instance}
1195 @param instance: The instance to verify
1196 @param _compute_fn: The function to verify ipolicy (unittest only)
1197 @see: L{_ComputeIPolicySpecViolation}
1200 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1201 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1202 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1203 disk_count = len(instance.disks)
1204 disk_sizes = [disk.size for disk in instance.disks]
1205 nic_count = len(instance.nics)
1207 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1208 disk_sizes, spindle_use)
1211 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1212 _compute_fn=_ComputeIPolicySpecViolation):
1213 """Compute if instance specs meets the specs of ipolicy.
1216 @param ipolicy: The ipolicy to verify against
1217 @param instance_spec: dict
1218 @param instance_spec: The instance spec to verify
1219 @param _compute_fn: The function to verify ipolicy (unittest only)
1220 @see: L{_ComputeIPolicySpecViolation}
1223 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1224 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1225 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1226 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1227 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1228 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1230 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1231 disk_sizes, spindle_use)
1234 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1236 _compute_fn=_ComputeIPolicyInstanceViolation):
1237 """Compute if instance meets the specs of the new target group.
1239 @param ipolicy: The ipolicy to verify
1240 @param instance: The instance object to verify
1241 @param current_group: The current group of the instance
1242 @param target_group: The new group of the instance
1243 @param _compute_fn: The function to verify ipolicy (unittest only)
1244 @see: L{_ComputeIPolicySpecViolation}
1247 if current_group == target_group:
1250 return _compute_fn(ipolicy, instance)
1253 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1254 _compute_fn=_ComputeIPolicyNodeViolation):
1255 """Checks that the target node is correct in terms of instance policy.
1257 @param ipolicy: The ipolicy to verify
1258 @param instance: The instance object to verify
1259 @param node: The new node to relocate
1260 @param ignore: Ignore violations of the ipolicy
1261 @param _compute_fn: The function to verify ipolicy (unittest only)
1262 @see: L{_ComputeIPolicySpecViolation}
1265 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1266 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1269 msg = ("Instance does not meet target node group's (%s) instance"
1270 " policy: %s") % (node.group, utils.CommaJoin(res))
1274 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1277 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1278 """Computes a set of any instances that would violate the new ipolicy.
1280 @param old_ipolicy: The current (still in-place) ipolicy
1281 @param new_ipolicy: The new (to become) ipolicy
1282 @param instances: List of instances to verify
1283 @return: A list of instances which violates the new ipolicy but
1287 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1288 _ComputeViolatingInstances(old_ipolicy, instances))
1291 def _ExpandItemName(fn, name, kind):
1292 """Expand an item name.
1294 @param fn: the function to use for expansion
1295 @param name: requested item name
1296 @param kind: text description ('Node' or 'Instance')
1297 @return: the resolved (full) name
1298 @raise errors.OpPrereqError: if the item is not found
1301 full_name = fn(name)
1302 if full_name is None:
1303 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1308 def _ExpandNodeName(cfg, name):
1309 """Wrapper over L{_ExpandItemName} for nodes."""
1310 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1313 def _ExpandInstanceName(cfg, name):
1314 """Wrapper over L{_ExpandItemName} for instance."""
1315 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1318 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1319 minmem, maxmem, vcpus, nics, disk_template, disks,
1320 bep, hvp, hypervisor_name, tags):
1321 """Builds instance related env variables for hooks
1323 This builds the hook environment from individual variables.
1326 @param name: the name of the instance
1327 @type primary_node: string
1328 @param primary_node: the name of the instance's primary node
1329 @type secondary_nodes: list
1330 @param secondary_nodes: list of secondary nodes as strings
1331 @type os_type: string
1332 @param os_type: the name of the instance's OS
1333 @type status: string
1334 @param status: the desired status of the instance
1335 @type minmem: string
1336 @param minmem: the minimum memory size of the instance
1337 @type maxmem: string
1338 @param maxmem: the maximum memory size of the instance
1340 @param vcpus: the count of VCPUs the instance has
1342 @param nics: list of tuples (ip, mac, mode, link) representing
1343 the NICs the instance has
1344 @type disk_template: string
1345 @param disk_template: the disk template of the instance
1347 @param disks: the list of (size, mode) pairs
1349 @param bep: the backend parameters for the instance
1351 @param hvp: the hypervisor parameters for the instance
1352 @type hypervisor_name: string
1353 @param hypervisor_name: the hypervisor for the instance
1355 @param tags: list of instance tags as strings
1357 @return: the hook environment for this instance
1362 "INSTANCE_NAME": name,
1363 "INSTANCE_PRIMARY": primary_node,
1364 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1365 "INSTANCE_OS_TYPE": os_type,
1366 "INSTANCE_STATUS": status,
1367 "INSTANCE_MINMEM": minmem,
1368 "INSTANCE_MAXMEM": maxmem,
1369 # TODO(2.7) remove deprecated "memory" value
1370 "INSTANCE_MEMORY": maxmem,
1371 "INSTANCE_VCPUS": vcpus,
1372 "INSTANCE_DISK_TEMPLATE": disk_template,
1373 "INSTANCE_HYPERVISOR": hypervisor_name,
1376 nic_count = len(nics)
1377 for idx, (ip, mac, mode, link) in enumerate(nics):
1380 env["INSTANCE_NIC%d_IP" % idx] = ip
1381 env["INSTANCE_NIC%d_MAC" % idx] = mac
1382 env["INSTANCE_NIC%d_MODE" % idx] = mode
1383 env["INSTANCE_NIC%d_LINK" % idx] = link
1384 if mode == constants.NIC_MODE_BRIDGED:
1385 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1389 env["INSTANCE_NIC_COUNT"] = nic_count
1392 disk_count = len(disks)
1393 for idx, (size, mode) in enumerate(disks):
1394 env["INSTANCE_DISK%d_SIZE" % idx] = size
1395 env["INSTANCE_DISK%d_MODE" % idx] = mode
1399 env["INSTANCE_DISK_COUNT"] = disk_count
1404 env["INSTANCE_TAGS"] = " ".join(tags)
1406 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1407 for key, value in source.items():
1408 env["INSTANCE_%s_%s" % (kind, key)] = value
1413 def _NICListToTuple(lu, nics):
1414 """Build a list of nic information tuples.
1416 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1417 value in LUInstanceQueryData.
1419 @type lu: L{LogicalUnit}
1420 @param lu: the logical unit on whose behalf we execute
1421 @type nics: list of L{objects.NIC}
1422 @param nics: list of nics to convert to hooks tuples
1426 cluster = lu.cfg.GetClusterInfo()
1430 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1431 mode = filled_params[constants.NIC_MODE]
1432 link = filled_params[constants.NIC_LINK]
1433 hooks_nics.append((ip, mac, mode, link))
1437 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1438 """Builds instance related env variables for hooks from an object.
1440 @type lu: L{LogicalUnit}
1441 @param lu: the logical unit on whose behalf we execute
1442 @type instance: L{objects.Instance}
1443 @param instance: the instance for which we should build the
1445 @type override: dict
1446 @param override: dictionary with key/values that will override
1449 @return: the hook environment dictionary
1452 cluster = lu.cfg.GetClusterInfo()
1453 bep = cluster.FillBE(instance)
1454 hvp = cluster.FillHV(instance)
1456 "name": instance.name,
1457 "primary_node": instance.primary_node,
1458 "secondary_nodes": instance.secondary_nodes,
1459 "os_type": instance.os,
1460 "status": instance.admin_state,
1461 "maxmem": bep[constants.BE_MAXMEM],
1462 "minmem": bep[constants.BE_MINMEM],
1463 "vcpus": bep[constants.BE_VCPUS],
1464 "nics": _NICListToTuple(lu, instance.nics),
1465 "disk_template": instance.disk_template,
1466 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1469 "hypervisor_name": instance.hypervisor,
1470 "tags": instance.tags,
1473 args.update(override)
1474 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1477 def _AdjustCandidatePool(lu, exceptions):
1478 """Adjust the candidate pool after node operations.
1481 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1483 lu.LogInfo("Promoted nodes to master candidate role: %s",
1484 utils.CommaJoin(node.name for node in mod_list))
1485 for name in mod_list:
1486 lu.context.ReaddNode(name)
1487 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1489 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1493 def _DecideSelfPromotion(lu, exceptions=None):
1494 """Decide whether I should promote myself as a master candidate.
1497 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1498 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1499 # the new node will increase mc_max with one, so:
1500 mc_should = min(mc_should + 1, cp_size)
1501 return mc_now < mc_should
1504 def _CalculateGroupIPolicy(cluster, group):
1505 """Calculate instance policy for group.
1508 return cluster.SimpleFillIPolicy(group.ipolicy)
1511 def _ComputeViolatingInstances(ipolicy, instances):
1512 """Computes a set of instances who violates given ipolicy.
1514 @param ipolicy: The ipolicy to verify
1515 @type instances: object.Instance
1516 @param instances: List of instances to verify
1517 @return: A frozenset of instance names violating the ipolicy
1520 return frozenset([inst.name for inst in instances
1521 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1524 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1525 """Check that the brigdes needed by a list of nics exist.
1528 cluster = lu.cfg.GetClusterInfo()
1529 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1530 brlist = [params[constants.NIC_LINK] for params in paramslist
1531 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1533 result = lu.rpc.call_bridges_exist(target_node, brlist)
1534 result.Raise("Error checking bridges on destination node '%s'" %
1535 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1538 def _CheckInstanceBridgesExist(lu, instance, node=None):
1539 """Check that the brigdes needed by an instance exist.
1543 node = instance.primary_node
1544 _CheckNicsBridgesExist(lu, instance.nics, node)
1547 def _CheckOSVariant(os_obj, name):
1548 """Check whether an OS name conforms to the os variants specification.
1550 @type os_obj: L{objects.OS}
1551 @param os_obj: OS object to check
1553 @param name: OS name passed by the user, to check for validity
1556 variant = objects.OS.GetVariant(name)
1557 if not os_obj.supported_variants:
1559 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1560 " passed)" % (os_obj.name, variant),
1564 raise errors.OpPrereqError("OS name must include a variant",
1567 if variant not in os_obj.supported_variants:
1568 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1571 def _GetNodeInstancesInner(cfg, fn):
1572 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1575 def _GetNodeInstances(cfg, node_name):
1576 """Returns a list of all primary and secondary instances on a node.
1580 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1583 def _GetNodePrimaryInstances(cfg, node_name):
1584 """Returns primary instances on a node.
1587 return _GetNodeInstancesInner(cfg,
1588 lambda inst: node_name == inst.primary_node)
1591 def _GetNodeSecondaryInstances(cfg, node_name):
1592 """Returns secondary instances on a node.
1595 return _GetNodeInstancesInner(cfg,
1596 lambda inst: node_name in inst.secondary_nodes)
1599 def _GetStorageTypeArgs(cfg, storage_type):
1600 """Returns the arguments for a storage type.
1603 # Special case for file storage
1604 if storage_type == constants.ST_FILE:
1605 # storage.FileStorage wants a list of storage directories
1606 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1611 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1614 for dev in instance.disks:
1615 cfg.SetDiskID(dev, node_name)
1617 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1619 result.Raise("Failed to get disk status from node %s" % node_name,
1620 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1622 for idx, bdev_status in enumerate(result.payload):
1623 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1629 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1630 """Check the sanity of iallocator and node arguments and use the
1631 cluster-wide iallocator if appropriate.
1633 Check that at most one of (iallocator, node) is specified. If none is
1634 specified, then the LU's opcode's iallocator slot is filled with the
1635 cluster-wide default iallocator.
1637 @type iallocator_slot: string
1638 @param iallocator_slot: the name of the opcode iallocator slot
1639 @type node_slot: string
1640 @param node_slot: the name of the opcode target node slot
1643 node = getattr(lu.op, node_slot, None)
1644 iallocator = getattr(lu.op, iallocator_slot, None)
1646 if node is not None and iallocator is not None:
1647 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1649 elif node is None and iallocator is None:
1650 default_iallocator = lu.cfg.GetDefaultIAllocator()
1651 if default_iallocator:
1652 setattr(lu.op, iallocator_slot, default_iallocator)
1654 raise errors.OpPrereqError("No iallocator or node given and no"
1655 " cluster-wide default iallocator found;"
1656 " please specify either an iallocator or a"
1657 " node, or set a cluster-wide default"
1661 def _GetDefaultIAllocator(cfg, iallocator):
1662 """Decides on which iallocator to use.
1664 @type cfg: L{config.ConfigWriter}
1665 @param cfg: Cluster configuration object
1666 @type iallocator: string or None
1667 @param iallocator: Iallocator specified in opcode
1669 @return: Iallocator name
1673 # Use default iallocator
1674 iallocator = cfg.GetDefaultIAllocator()
1677 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1678 " opcode nor as a cluster-wide default",
1684 class LUClusterPostInit(LogicalUnit):
1685 """Logical unit for running hooks after cluster initialization.
1688 HPATH = "cluster-init"
1689 HTYPE = constants.HTYPE_CLUSTER
1691 def BuildHooksEnv(self):
1696 "OP_TARGET": self.cfg.GetClusterName(),
1699 def BuildHooksNodes(self):
1700 """Build hooks nodes.
1703 return ([], [self.cfg.GetMasterNode()])
1705 def Exec(self, feedback_fn):
1712 class LUClusterDestroy(LogicalUnit):
1713 """Logical unit for destroying the cluster.
1716 HPATH = "cluster-destroy"
1717 HTYPE = constants.HTYPE_CLUSTER
1719 def BuildHooksEnv(self):
1724 "OP_TARGET": self.cfg.GetClusterName(),
1727 def BuildHooksNodes(self):
1728 """Build hooks nodes.
1733 def CheckPrereq(self):
1734 """Check prerequisites.
1736 This checks whether the cluster is empty.
1738 Any errors are signaled by raising errors.OpPrereqError.
1741 master = self.cfg.GetMasterNode()
1743 nodelist = self.cfg.GetNodeList()
1744 if len(nodelist) != 1 or nodelist[0] != master:
1745 raise errors.OpPrereqError("There are still %d node(s) in"
1746 " this cluster." % (len(nodelist) - 1),
1748 instancelist = self.cfg.GetInstanceList()
1750 raise errors.OpPrereqError("There are still %d instance(s) in"
1751 " this cluster." % len(instancelist),
1754 def Exec(self, feedback_fn):
1755 """Destroys the cluster.
1758 master_params = self.cfg.GetMasterNetworkParameters()
1760 # Run post hooks on master node before it's removed
1761 _RunPostHook(self, master_params.name)
1763 ems = self.cfg.GetUseExternalMipScript()
1764 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1767 self.LogWarning("Error disabling the master IP address: %s",
1770 return master_params.name
1773 def _VerifyCertificate(filename):
1774 """Verifies a certificate for L{LUClusterVerifyConfig}.
1776 @type filename: string
1777 @param filename: Path to PEM file
1781 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1782 utils.ReadFile(filename))
1783 except Exception, err: # pylint: disable=W0703
1784 return (LUClusterVerifyConfig.ETYPE_ERROR,
1785 "Failed to load X509 certificate %s: %s" % (filename, err))
1788 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1789 constants.SSL_CERT_EXPIRATION_ERROR)
1792 fnamemsg = "While verifying %s: %s" % (filename, msg)
1797 return (None, fnamemsg)
1798 elif errcode == utils.CERT_WARNING:
1799 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1800 elif errcode == utils.CERT_ERROR:
1801 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1803 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1806 def _GetAllHypervisorParameters(cluster, instances):
1807 """Compute the set of all hypervisor parameters.
1809 @type cluster: L{objects.Cluster}
1810 @param cluster: the cluster object
1811 @param instances: list of L{objects.Instance}
1812 @param instances: additional instances from which to obtain parameters
1813 @rtype: list of (origin, hypervisor, parameters)
1814 @return: a list with all parameters found, indicating the hypervisor they
1815 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1820 for hv_name in cluster.enabled_hypervisors:
1821 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1823 for os_name, os_hvp in cluster.os_hvp.items():
1824 for hv_name, hv_params in os_hvp.items():
1826 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1827 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1829 # TODO: collapse identical parameter values in a single one
1830 for instance in instances:
1831 if instance.hvparams:
1832 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1833 cluster.FillHV(instance)))
1838 class _VerifyErrors(object):
1839 """Mix-in for cluster/group verify LUs.
1841 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1842 self.op and self._feedback_fn to be available.)
1846 ETYPE_FIELD = "code"
1847 ETYPE_ERROR = "ERROR"
1848 ETYPE_WARNING = "WARNING"
1850 def _Error(self, ecode, item, msg, *args, **kwargs):
1851 """Format an error message.
1853 Based on the opcode's error_codes parameter, either format a
1854 parseable error code, or a simpler error string.
1856 This must be called only from Exec and functions called from Exec.
1859 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1860 itype, etxt, _ = ecode
1861 # first complete the msg
1864 # then format the whole message
1865 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1866 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1872 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1873 # and finally report it via the feedback_fn
1874 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1876 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1877 """Log an error message if the passed condition is True.
1881 or self.op.debug_simulate_errors) # pylint: disable=E1101
1883 # If the error code is in the list of ignored errors, demote the error to a
1885 (_, etxt, _) = ecode
1886 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1887 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1890 self._Error(ecode, *args, **kwargs)
1892 # do not mark the operation as failed for WARN cases only
1893 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1894 self.bad = self.bad or cond
1897 class LUClusterVerify(NoHooksLU):
1898 """Submits all jobs necessary to verify the cluster.
1903 def ExpandNames(self):
1904 self.needed_locks = {}
1906 def Exec(self, feedback_fn):
1909 if self.op.group_name:
1910 groups = [self.op.group_name]
1911 depends_fn = lambda: None
1913 groups = self.cfg.GetNodeGroupList()
1915 # Verify global configuration
1917 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1920 # Always depend on global verification
1921 depends_fn = lambda: [(-len(jobs), [])]
1923 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1924 ignore_errors=self.op.ignore_errors,
1925 depends=depends_fn())]
1926 for group in groups)
1928 # Fix up all parameters
1929 for op in itertools.chain(*jobs): # pylint: disable=W0142
1930 op.debug_simulate_errors = self.op.debug_simulate_errors
1931 op.verbose = self.op.verbose
1932 op.error_codes = self.op.error_codes
1934 op.skip_checks = self.op.skip_checks
1935 except AttributeError:
1936 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1938 return ResultWithJobs(jobs)
1941 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1942 """Verifies the cluster config.
1947 def _VerifyHVP(self, hvp_data):
1948 """Verifies locally the syntax of the hypervisor parameters.
1951 for item, hv_name, hv_params in hvp_data:
1952 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1955 hv_class = hypervisor.GetHypervisor(hv_name)
1956 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1957 hv_class.CheckParameterSyntax(hv_params)
1958 except errors.GenericError, err:
1959 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1961 def ExpandNames(self):
1962 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1963 self.share_locks = _ShareAll()
1965 def CheckPrereq(self):
1966 """Check prerequisites.
1969 # Retrieve all information
1970 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1971 self.all_node_info = self.cfg.GetAllNodesInfo()
1972 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1974 def Exec(self, feedback_fn):
1975 """Verify integrity of cluster, performing various test on nodes.
1979 self._feedback_fn = feedback_fn
1981 feedback_fn("* Verifying cluster config")
1983 for msg in self.cfg.VerifyConfig():
1984 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1986 feedback_fn("* Verifying cluster certificate files")
1988 for cert_filename in constants.ALL_CERT_FILES:
1989 (errcode, msg) = _VerifyCertificate(cert_filename)
1990 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1992 feedback_fn("* Verifying hypervisor parameters")
1994 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1995 self.all_inst_info.values()))
1997 feedback_fn("* Verifying all nodes belong to an existing group")
1999 # We do this verification here because, should this bogus circumstance
2000 # occur, it would never be caught by VerifyGroup, which only acts on
2001 # nodes/instances reachable from existing node groups.
2003 dangling_nodes = set(node.name for node in self.all_node_info.values()
2004 if node.group not in self.all_group_info)
2006 dangling_instances = {}
2007 no_node_instances = []
2009 for inst in self.all_inst_info.values():
2010 if inst.primary_node in dangling_nodes:
2011 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2012 elif inst.primary_node not in self.all_node_info:
2013 no_node_instances.append(inst.name)
2018 utils.CommaJoin(dangling_instances.get(node.name,
2020 for node in dangling_nodes]
2022 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2024 "the following nodes (and their instances) belong to a non"
2025 " existing group: %s", utils.CommaJoin(pretty_dangling))
2027 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2029 "the following instances have a non-existing primary-node:"
2030 " %s", utils.CommaJoin(no_node_instances))
2035 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2036 """Verifies the status of a node group.
2039 HPATH = "cluster-verify"
2040 HTYPE = constants.HTYPE_CLUSTER
2043 _HOOKS_INDENT_RE = re.compile("^", re.M)
2045 class NodeImage(object):
2046 """A class representing the logical and physical status of a node.
2049 @ivar name: the node name to which this object refers
2050 @ivar volumes: a structure as returned from
2051 L{ganeti.backend.GetVolumeList} (runtime)
2052 @ivar instances: a list of running instances (runtime)
2053 @ivar pinst: list of configured primary instances (config)
2054 @ivar sinst: list of configured secondary instances (config)
2055 @ivar sbp: dictionary of {primary-node: list of instances} for all
2056 instances for which this node is secondary (config)
2057 @ivar mfree: free memory, as reported by hypervisor (runtime)
2058 @ivar dfree: free disk, as reported by the node (runtime)
2059 @ivar offline: the offline status (config)
2060 @type rpc_fail: boolean
2061 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2062 not whether the individual keys were correct) (runtime)
2063 @type lvm_fail: boolean
2064 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2065 @type hyp_fail: boolean
2066 @ivar hyp_fail: whether the RPC call didn't return the instance list
2067 @type ghost: boolean
2068 @ivar ghost: whether this is a known node or not (config)
2069 @type os_fail: boolean
2070 @ivar os_fail: whether the RPC call didn't return valid OS data
2072 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2073 @type vm_capable: boolean
2074 @ivar vm_capable: whether the node can host instances
2077 def __init__(self, offline=False, name=None, vm_capable=True):
2086 self.offline = offline
2087 self.vm_capable = vm_capable
2088 self.rpc_fail = False
2089 self.lvm_fail = False
2090 self.hyp_fail = False
2092 self.os_fail = False
2095 def ExpandNames(self):
2096 # This raises errors.OpPrereqError on its own:
2097 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2099 # Get instances in node group; this is unsafe and needs verification later
2101 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2103 self.needed_locks = {
2104 locking.LEVEL_INSTANCE: inst_names,
2105 locking.LEVEL_NODEGROUP: [self.group_uuid],
2106 locking.LEVEL_NODE: [],
2109 self.share_locks = _ShareAll()
2111 def DeclareLocks(self, level):
2112 if level == locking.LEVEL_NODE:
2113 # Get members of node group; this is unsafe and needs verification later
2114 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2116 all_inst_info = self.cfg.GetAllInstancesInfo()
2118 # In Exec(), we warn about mirrored instances that have primary and
2119 # secondary living in separate node groups. To fully verify that
2120 # volumes for these instances are healthy, we will need to do an
2121 # extra call to their secondaries. We ensure here those nodes will
2123 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2124 # Important: access only the instances whose lock is owned
2125 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2126 nodes.update(all_inst_info[inst].secondary_nodes)
2128 self.needed_locks[locking.LEVEL_NODE] = nodes
2130 def CheckPrereq(self):
2131 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2132 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2134 group_nodes = set(self.group_info.members)
2136 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2139 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2141 unlocked_instances = \
2142 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2145 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2146 utils.CommaJoin(unlocked_nodes),
2149 if unlocked_instances:
2150 raise errors.OpPrereqError("Missing lock for instances: %s" %
2151 utils.CommaJoin(unlocked_instances),
2154 self.all_node_info = self.cfg.GetAllNodesInfo()
2155 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2157 self.my_node_names = utils.NiceSort(group_nodes)
2158 self.my_inst_names = utils.NiceSort(group_instances)
2160 self.my_node_info = dict((name, self.all_node_info[name])
2161 for name in self.my_node_names)
2163 self.my_inst_info = dict((name, self.all_inst_info[name])
2164 for name in self.my_inst_names)
2166 # We detect here the nodes that will need the extra RPC calls for verifying
2167 # split LV volumes; they should be locked.
2168 extra_lv_nodes = set()
2170 for inst in self.my_inst_info.values():
2171 if inst.disk_template in constants.DTS_INT_MIRROR:
2172 for nname in inst.all_nodes:
2173 if self.all_node_info[nname].group != self.group_uuid:
2174 extra_lv_nodes.add(nname)
2176 unlocked_lv_nodes = \
2177 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2179 if unlocked_lv_nodes:
2180 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2181 utils.CommaJoin(unlocked_lv_nodes),
2183 self.extra_lv_nodes = list(extra_lv_nodes)
2185 def _VerifyNode(self, ninfo, nresult):
2186 """Perform some basic validation on data returned from a node.
2188 - check the result data structure is well formed and has all the
2190 - check ganeti version
2192 @type ninfo: L{objects.Node}
2193 @param ninfo: the node to check
2194 @param nresult: the results from the node
2196 @return: whether overall this call was successful (and we can expect
2197 reasonable values in the respose)
2201 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2203 # main result, nresult should be a non-empty dict
2204 test = not nresult or not isinstance(nresult, dict)
2205 _ErrorIf(test, constants.CV_ENODERPC, node,
2206 "unable to verify node: no data returned")
2210 # compares ganeti version
2211 local_version = constants.PROTOCOL_VERSION
2212 remote_version = nresult.get("version", None)
2213 test = not (remote_version and
2214 isinstance(remote_version, (list, tuple)) and
2215 len(remote_version) == 2)
2216 _ErrorIf(test, constants.CV_ENODERPC, node,
2217 "connection to node returned invalid data")
2221 test = local_version != remote_version[0]
2222 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2223 "incompatible protocol versions: master %s,"
2224 " node %s", local_version, remote_version[0])
2228 # node seems compatible, we can actually try to look into its results
2230 # full package version
2231 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2232 constants.CV_ENODEVERSION, node,
2233 "software version mismatch: master %s, node %s",
2234 constants.RELEASE_VERSION, remote_version[1],
2235 code=self.ETYPE_WARNING)
2237 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2238 if ninfo.vm_capable and isinstance(hyp_result, dict):
2239 for hv_name, hv_result in hyp_result.iteritems():
2240 test = hv_result is not None
2241 _ErrorIf(test, constants.CV_ENODEHV, node,
2242 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2244 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2245 if ninfo.vm_capable and isinstance(hvp_result, list):
2246 for item, hv_name, hv_result in hvp_result:
2247 _ErrorIf(True, constants.CV_ENODEHV, node,
2248 "hypervisor %s parameter verify failure (source %s): %s",
2249 hv_name, item, hv_result)
2251 test = nresult.get(constants.NV_NODESETUP,
2252 ["Missing NODESETUP results"])
2253 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2258 def _VerifyNodeTime(self, ninfo, nresult,
2259 nvinfo_starttime, nvinfo_endtime):
2260 """Check the node time.
2262 @type ninfo: L{objects.Node}
2263 @param ninfo: the node to check
2264 @param nresult: the remote results for the node
2265 @param nvinfo_starttime: the start time of the RPC call
2266 @param nvinfo_endtime: the end time of the RPC call
2270 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2272 ntime = nresult.get(constants.NV_TIME, None)
2274 ntime_merged = utils.MergeTime(ntime)
2275 except (ValueError, TypeError):
2276 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2279 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2280 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2281 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2282 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2286 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2287 "Node time diverges by at least %s from master node time",
2290 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2291 """Check the node LVM results.
2293 @type ninfo: L{objects.Node}
2294 @param ninfo: the node to check
2295 @param nresult: the remote results for the node
2296 @param vg_name: the configured VG name
2303 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2305 # checks vg existence and size > 20G
2306 vglist = nresult.get(constants.NV_VGLIST, None)
2308 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2310 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2311 constants.MIN_VG_SIZE)
2312 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2315 pvlist = nresult.get(constants.NV_PVLIST, None)
2316 test = pvlist is None
2317 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2319 # check that ':' is not present in PV names, since it's a
2320 # special character for lvcreate (denotes the range of PEs to
2322 for _, pvname, owner_vg in pvlist:
2323 test = ":" in pvname
2324 _ErrorIf(test, constants.CV_ENODELVM, node,
2325 "Invalid character ':' in PV '%s' of VG '%s'",
2328 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2329 """Check the node bridges.
2331 @type ninfo: L{objects.Node}
2332 @param ninfo: the node to check
2333 @param nresult: the remote results for the node
2334 @param bridges: the expected list of bridges
2341 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2343 missing = nresult.get(constants.NV_BRIDGES, None)
2344 test = not isinstance(missing, list)
2345 _ErrorIf(test, constants.CV_ENODENET, node,
2346 "did not return valid bridge information")
2348 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2349 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2351 def _VerifyNodeUserScripts(self, ninfo, nresult):
2352 """Check the results of user scripts presence and executability on the node
2354 @type ninfo: L{objects.Node}
2355 @param ninfo: the node to check
2356 @param nresult: the remote results for the node
2361 test = not constants.NV_USERSCRIPTS in nresult
2362 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2363 "did not return user scripts information")
2365 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2367 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2368 "user scripts not present or not executable: %s" %
2369 utils.CommaJoin(sorted(broken_scripts)))
2371 def _VerifyNodeNetwork(self, ninfo, nresult):
2372 """Check the node network connectivity results.
2374 @type ninfo: L{objects.Node}
2375 @param ninfo: the node to check
2376 @param nresult: the remote results for the node
2380 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2382 test = constants.NV_NODELIST not in nresult
2383 _ErrorIf(test, constants.CV_ENODESSH, node,
2384 "node hasn't returned node ssh connectivity data")
2386 if nresult[constants.NV_NODELIST]:
2387 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2388 _ErrorIf(True, constants.CV_ENODESSH, node,
2389 "ssh communication with node '%s': %s", a_node, a_msg)
2391 test = constants.NV_NODENETTEST not in nresult
2392 _ErrorIf(test, constants.CV_ENODENET, node,
2393 "node hasn't returned node tcp connectivity data")
2395 if nresult[constants.NV_NODENETTEST]:
2396 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2398 _ErrorIf(True, constants.CV_ENODENET, node,
2399 "tcp communication with node '%s': %s",
2400 anode, nresult[constants.NV_NODENETTEST][anode])
2402 test = constants.NV_MASTERIP not in nresult
2403 _ErrorIf(test, constants.CV_ENODENET, node,
2404 "node hasn't returned node master IP reachability data")
2406 if not nresult[constants.NV_MASTERIP]:
2407 if node == self.master_node:
2408 msg = "the master node cannot reach the master IP (not configured?)"
2410 msg = "cannot reach the master IP"
2411 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2413 def _VerifyInstance(self, instance, instanceconfig, node_image,
2415 """Verify an instance.
2417 This function checks to see if the required block devices are
2418 available on the instance's node.
2421 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2422 node_current = instanceconfig.primary_node
2424 node_vol_should = {}
2425 instanceconfig.MapLVsByNode(node_vol_should)
2427 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2428 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2429 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2431 for node in node_vol_should:
2432 n_img = node_image[node]
2433 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2434 # ignore missing volumes on offline or broken nodes
2436 for volume in node_vol_should[node]:
2437 test = volume not in n_img.volumes
2438 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2439 "volume %s missing on node %s", volume, node)
2441 if instanceconfig.admin_state == constants.ADMINST_UP:
2442 pri_img = node_image[node_current]
2443 test = instance not in pri_img.instances and not pri_img.offline
2444 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2445 "instance not running on its primary node %s",
2448 diskdata = [(nname, success, status, idx)
2449 for (nname, disks) in diskstatus.items()
2450 for idx, (success, status) in enumerate(disks)]
2452 for nname, success, bdev_status, idx in diskdata:
2453 # the 'ghost node' construction in Exec() ensures that we have a
2455 snode = node_image[nname]
2456 bad_snode = snode.ghost or snode.offline
2457 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2458 not success and not bad_snode,
2459 constants.CV_EINSTANCEFAULTYDISK, instance,
2460 "couldn't retrieve status for disk/%s on %s: %s",
2461 idx, nname, bdev_status)
2462 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2463 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2464 constants.CV_EINSTANCEFAULTYDISK, instance,
2465 "disk/%s on %s is faulty", idx, nname)
2467 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2468 """Verify if there are any unknown volumes in the cluster.
2470 The .os, .swap and backup volumes are ignored. All other volumes are
2471 reported as unknown.
2473 @type reserved: L{ganeti.utils.FieldSet}
2474 @param reserved: a FieldSet of reserved volume names
2477 for node, n_img in node_image.items():
2478 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2479 self.all_node_info[node].group != self.group_uuid):
2480 # skip non-healthy nodes
2482 for volume in n_img.volumes:
2483 test = ((node not in node_vol_should or
2484 volume not in node_vol_should[node]) and
2485 not reserved.Matches(volume))
2486 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2487 "volume %s is unknown", volume)
2489 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2490 """Verify N+1 Memory Resilience.
2492 Check that if one single node dies we can still start all the
2493 instances it was primary for.
2496 cluster_info = self.cfg.GetClusterInfo()
2497 for node, n_img in node_image.items():
2498 # This code checks that every node which is now listed as
2499 # secondary has enough memory to host all instances it is
2500 # supposed to should a single other node in the cluster fail.
2501 # FIXME: not ready for failover to an arbitrary node
2502 # FIXME: does not support file-backed instances
2503 # WARNING: we currently take into account down instances as well
2504 # as up ones, considering that even if they're down someone
2505 # might want to start them even in the event of a node failure.
2506 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2507 # we're skipping nodes marked offline and nodes in other groups from
2508 # the N+1 warning, since most likely we don't have good memory
2509 # infromation from them; we already list instances living on such
2510 # nodes, and that's enough warning
2512 #TODO(dynmem): also consider ballooning out other instances
2513 for prinode, instances in n_img.sbp.items():
2515 for instance in instances:
2516 bep = cluster_info.FillBE(instance_cfg[instance])
2517 if bep[constants.BE_AUTO_BALANCE]:
2518 needed_mem += bep[constants.BE_MINMEM]
2519 test = n_img.mfree < needed_mem
2520 self._ErrorIf(test, constants.CV_ENODEN1, node,
2521 "not enough memory to accomodate instance failovers"
2522 " should node %s fail (%dMiB needed, %dMiB available)",
2523 prinode, needed_mem, n_img.mfree)
2526 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2527 (files_all, files_opt, files_mc, files_vm)):
2528 """Verifies file checksums collected from all nodes.
2530 @param errorif: Callback for reporting errors
2531 @param nodeinfo: List of L{objects.Node} objects
2532 @param master_node: Name of master node
2533 @param all_nvinfo: RPC results
2536 # Define functions determining which nodes to consider for a file
2539 (files_mc, lambda node: (node.master_candidate or
2540 node.name == master_node)),
2541 (files_vm, lambda node: node.vm_capable),
2544 # Build mapping from filename to list of nodes which should have the file
2546 for (files, fn) in files2nodefn:
2548 filenodes = nodeinfo
2550 filenodes = filter(fn, nodeinfo)
2551 nodefiles.update((filename,
2552 frozenset(map(operator.attrgetter("name"), filenodes)))
2553 for filename in files)
2555 assert set(nodefiles) == (files_all | files_mc | files_vm)
2557 fileinfo = dict((filename, {}) for filename in nodefiles)
2558 ignore_nodes = set()
2560 for node in nodeinfo:
2562 ignore_nodes.add(node.name)
2565 nresult = all_nvinfo[node.name]
2567 if nresult.fail_msg or not nresult.payload:
2570 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2572 test = not (node_files and isinstance(node_files, dict))
2573 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2574 "Node did not return file checksum data")
2576 ignore_nodes.add(node.name)
2579 # Build per-checksum mapping from filename to nodes having it
2580 for (filename, checksum) in node_files.items():
2581 assert filename in nodefiles
2582 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2584 for (filename, checksums) in fileinfo.items():
2585 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2587 # Nodes having the file
2588 with_file = frozenset(node_name
2589 for nodes in fileinfo[filename].values()
2590 for node_name in nodes) - ignore_nodes
2592 expected_nodes = nodefiles[filename] - ignore_nodes
2594 # Nodes missing file
2595 missing_file = expected_nodes - with_file
2597 if filename in files_opt:
2599 errorif(missing_file and missing_file != expected_nodes,
2600 constants.CV_ECLUSTERFILECHECK, None,
2601 "File %s is optional, but it must exist on all or no"
2602 " nodes (not found on %s)",
2603 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2605 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2606 "File %s is missing from node(s) %s", filename,
2607 utils.CommaJoin(utils.NiceSort(missing_file)))
2609 # Warn if a node has a file it shouldn't
2610 unexpected = with_file - expected_nodes
2612 constants.CV_ECLUSTERFILECHECK, None,
2613 "File %s should not exist on node(s) %s",
2614 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2616 # See if there are multiple versions of the file
2617 test = len(checksums) > 1
2619 variants = ["variant %s on %s" %
2620 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2621 for (idx, (checksum, nodes)) in
2622 enumerate(sorted(checksums.items()))]
2626 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2627 "File %s found with %s different checksums (%s)",
2628 filename, len(checksums), "; ".join(variants))
2630 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2632 """Verifies and the node DRBD status.
2634 @type ninfo: L{objects.Node}
2635 @param ninfo: the node to check
2636 @param nresult: the remote results for the node
2637 @param instanceinfo: the dict of instances
2638 @param drbd_helper: the configured DRBD usermode helper
2639 @param drbd_map: the DRBD map as returned by
2640 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2644 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2647 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2648 test = (helper_result == None)
2649 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2650 "no drbd usermode helper returned")
2652 status, payload = helper_result
2654 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655 "drbd usermode helper check unsuccessful: %s", payload)
2656 test = status and (payload != drbd_helper)
2657 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2658 "wrong drbd usermode helper: %s", payload)
2660 # compute the DRBD minors
2662 for minor, instance in drbd_map[node].items():
2663 test = instance not in instanceinfo
2664 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2665 "ghost instance '%s' in temporary DRBD map", instance)
2666 # ghost instance should not be running, but otherwise we
2667 # don't give double warnings (both ghost instance and
2668 # unallocated minor in use)
2670 node_drbd[minor] = (instance, False)
2672 instance = instanceinfo[instance]
2673 node_drbd[minor] = (instance.name,
2674 instance.admin_state == constants.ADMINST_UP)
2676 # and now check them
2677 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2678 test = not isinstance(used_minors, (tuple, list))
2679 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2680 "cannot parse drbd status file: %s", str(used_minors))
2682 # we cannot check drbd status
2685 for minor, (iname, must_exist) in node_drbd.items():
2686 test = minor not in used_minors and must_exist
2687 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2688 "drbd minor %d of instance %s is not active", minor, iname)
2689 for minor in used_minors:
2690 test = minor not in node_drbd
2691 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2692 "unallocated drbd minor %d is in use", minor)
2694 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2695 """Builds the node OS structures.
2697 @type ninfo: L{objects.Node}
2698 @param ninfo: the node to check
2699 @param nresult: the remote results for the node
2700 @param nimg: the node image object
2704 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2706 remote_os = nresult.get(constants.NV_OSLIST, None)
2707 test = (not isinstance(remote_os, list) or
2708 not compat.all(isinstance(v, list) and len(v) == 7
2709 for v in remote_os))
2711 _ErrorIf(test, constants.CV_ENODEOS, node,
2712 "node hasn't returned valid OS data")
2721 for (name, os_path, status, diagnose,
2722 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2724 if name not in os_dict:
2727 # parameters is a list of lists instead of list of tuples due to
2728 # JSON lacking a real tuple type, fix it:
2729 parameters = [tuple(v) for v in parameters]
2730 os_dict[name].append((os_path, status, diagnose,
2731 set(variants), set(parameters), set(api_ver)))
2733 nimg.oslist = os_dict
2735 def _VerifyNodeOS(self, ninfo, nimg, base):
2736 """Verifies the node OS list.
2738 @type ninfo: L{objects.Node}
2739 @param ninfo: the node to check
2740 @param nimg: the node image object
2741 @param base: the 'template' node we match against (e.g. from the master)
2745 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2747 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2749 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2750 for os_name, os_data in nimg.oslist.items():
2751 assert os_data, "Empty OS status for OS %s?!" % os_name
2752 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2753 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2754 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2755 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2756 "OS '%s' has multiple entries (first one shadows the rest): %s",
2757 os_name, utils.CommaJoin([v[0] for v in os_data]))
2758 # comparisons with the 'base' image
2759 test = os_name not in base.oslist
2760 _ErrorIf(test, constants.CV_ENODEOS, node,
2761 "Extra OS %s not present on reference node (%s)",
2765 assert base.oslist[os_name], "Base node has empty OS status?"
2766 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2768 # base OS is invalid, skipping
2770 for kind, a, b in [("API version", f_api, b_api),
2771 ("variants list", f_var, b_var),
2772 ("parameters", beautify_params(f_param),
2773 beautify_params(b_param))]:
2774 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2775 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2776 kind, os_name, base.name,
2777 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2779 # check any missing OSes
2780 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2781 _ErrorIf(missing, constants.CV_ENODEOS, node,
2782 "OSes present on reference node %s but missing on this node: %s",
2783 base.name, utils.CommaJoin(missing))
2785 def _VerifyOob(self, ninfo, nresult):
2786 """Verifies out of band functionality of a node.
2788 @type ninfo: L{objects.Node}
2789 @param ninfo: the node to check
2790 @param nresult: the remote results for the node
2794 # We just have to verify the paths on master and/or master candidates
2795 # as the oob helper is invoked on the master
2796 if ((ninfo.master_candidate or ninfo.master_capable) and
2797 constants.NV_OOB_PATHS in nresult):
2798 for path_result in nresult[constants.NV_OOB_PATHS]:
2799 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2801 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2802 """Verifies and updates the node volume data.
2804 This function will update a L{NodeImage}'s internal structures
2805 with data from the remote call.
2807 @type ninfo: L{objects.Node}
2808 @param ninfo: the node to check
2809 @param nresult: the remote results for the node
2810 @param nimg: the node image object
2811 @param vg_name: the configured VG name
2815 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2817 nimg.lvm_fail = True
2818 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2821 elif isinstance(lvdata, basestring):
2822 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2823 utils.SafeEncode(lvdata))
2824 elif not isinstance(lvdata, dict):
2825 _ErrorIf(True, constants.CV_ENODELVM, node,
2826 "rpc call to node failed (lvlist)")
2828 nimg.volumes = lvdata
2829 nimg.lvm_fail = False
2831 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2832 """Verifies and updates the node instance list.
2834 If the listing was successful, then updates this node's instance
2835 list. Otherwise, it marks the RPC call as failed for the instance
2838 @type ninfo: L{objects.Node}
2839 @param ninfo: the node to check
2840 @param nresult: the remote results for the node
2841 @param nimg: the node image object
2844 idata = nresult.get(constants.NV_INSTANCELIST, None)
2845 test = not isinstance(idata, list)
2846 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2847 "rpc call to node failed (instancelist): %s",
2848 utils.SafeEncode(str(idata)))
2850 nimg.hyp_fail = True
2852 nimg.instances = idata
2854 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2855 """Verifies and computes a node information map
2857 @type ninfo: L{objects.Node}
2858 @param ninfo: the node to check
2859 @param nresult: the remote results for the node
2860 @param nimg: the node image object
2861 @param vg_name: the configured VG name
2865 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2867 # try to read free memory (from the hypervisor)
2868 hv_info = nresult.get(constants.NV_HVINFO, None)
2869 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2870 _ErrorIf(test, constants.CV_ENODEHV, node,
2871 "rpc call to node failed (hvinfo)")
2874 nimg.mfree = int(hv_info["memory_free"])
2875 except (ValueError, TypeError):
2876 _ErrorIf(True, constants.CV_ENODERPC, node,
2877 "node returned invalid nodeinfo, check hypervisor")
2879 # FIXME: devise a free space model for file based instances as well
2880 if vg_name is not None:
2881 test = (constants.NV_VGLIST not in nresult or
2882 vg_name not in nresult[constants.NV_VGLIST])
2883 _ErrorIf(test, constants.CV_ENODELVM, node,
2884 "node didn't return data for the volume group '%s'"
2885 " - it is either missing or broken", vg_name)
2888 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2889 except (ValueError, TypeError):
2890 _ErrorIf(True, constants.CV_ENODERPC, node,
2891 "node returned invalid LVM info, check LVM status")
2893 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2894 """Gets per-disk status information for all instances.
2896 @type nodelist: list of strings
2897 @param nodelist: Node names
2898 @type node_image: dict of (name, L{objects.Node})
2899 @param node_image: Node objects
2900 @type instanceinfo: dict of (name, L{objects.Instance})
2901 @param instanceinfo: Instance objects
2902 @rtype: {instance: {node: [(succes, payload)]}}
2903 @return: a dictionary of per-instance dictionaries with nodes as
2904 keys and disk information as values; the disk information is a
2905 list of tuples (success, payload)
2908 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2911 node_disks_devonly = {}
2912 diskless_instances = set()
2913 diskless = constants.DT_DISKLESS
2915 for nname in nodelist:
2916 node_instances = list(itertools.chain(node_image[nname].pinst,
2917 node_image[nname].sinst))
2918 diskless_instances.update(inst for inst in node_instances
2919 if instanceinfo[inst].disk_template == diskless)
2920 disks = [(inst, disk)
2921 for inst in node_instances
2922 for disk in instanceinfo[inst].disks]
2925 # No need to collect data
2928 node_disks[nname] = disks
2930 # _AnnotateDiskParams makes already copies of the disks
2932 for (inst, dev) in disks:
2933 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2934 self.cfg.SetDiskID(anno_disk, nname)
2935 devonly.append(anno_disk)
2937 node_disks_devonly[nname] = devonly
2939 assert len(node_disks) == len(node_disks_devonly)
2941 # Collect data from all nodes with disks
2942 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2945 assert len(result) == len(node_disks)
2949 for (nname, nres) in result.items():
2950 disks = node_disks[nname]
2953 # No data from this node
2954 data = len(disks) * [(False, "node offline")]
2957 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2958 "while getting disk information: %s", msg)
2960 # No data from this node
2961 data = len(disks) * [(False, msg)]
2964 for idx, i in enumerate(nres.payload):
2965 if isinstance(i, (tuple, list)) and len(i) == 2:
2968 logging.warning("Invalid result from node %s, entry %d: %s",
2970 data.append((False, "Invalid result from the remote node"))
2972 for ((inst, _), status) in zip(disks, data):
2973 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2975 # Add empty entries for diskless instances.
2976 for inst in diskless_instances:
2977 assert inst not in instdisk
2980 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2981 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2982 compat.all(isinstance(s, (tuple, list)) and
2983 len(s) == 2 for s in statuses)
2984 for inst, nnames in instdisk.items()
2985 for nname, statuses in nnames.items())
2986 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2991 def _SshNodeSelector(group_uuid, all_nodes):
2992 """Create endless iterators for all potential SSH check hosts.
2995 nodes = [node for node in all_nodes
2996 if (node.group != group_uuid and
2998 keyfunc = operator.attrgetter("group")
3000 return map(itertools.cycle,
3001 [sorted(map(operator.attrgetter("name"), names))
3002 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3006 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3007 """Choose which nodes should talk to which other nodes.
3009 We will make nodes contact all nodes in their group, and one node from
3012 @warning: This algorithm has a known issue if one node group is much
3013 smaller than others (e.g. just one node). In such a case all other
3014 nodes will talk to the single node.
3017 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3018 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3020 return (online_nodes,
3021 dict((name, sorted([i.next() for i in sel]))
3022 for name in online_nodes))
3024 def BuildHooksEnv(self):
3027 Cluster-Verify hooks just ran in the post phase and their failure makes
3028 the output be logged in the verify output and the verification to fail.
3032 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3035 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3036 for node in self.my_node_info.values())
3040 def BuildHooksNodes(self):
3041 """Build hooks nodes.
3044 return ([], self.my_node_names)
3046 def Exec(self, feedback_fn):
3047 """Verify integrity of the node group, performing various test on nodes.
3050 # This method has too many local variables. pylint: disable=R0914
3051 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3053 if not self.my_node_names:
3055 feedback_fn("* Empty node group, skipping verification")
3059 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3060 verbose = self.op.verbose
3061 self._feedback_fn = feedback_fn
3063 vg_name = self.cfg.GetVGName()
3064 drbd_helper = self.cfg.GetDRBDHelper()
3065 cluster = self.cfg.GetClusterInfo()
3066 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3067 hypervisors = cluster.enabled_hypervisors
3068 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3070 i_non_redundant = [] # Non redundant instances
3071 i_non_a_balanced = [] # Non auto-balanced instances
3072 i_offline = 0 # Count of offline instances
3073 n_offline = 0 # Count of offline nodes
3074 n_drained = 0 # Count of nodes being drained
3075 node_vol_should = {}
3077 # FIXME: verify OS list
3080 filemap = _ComputeAncillaryFiles(cluster, False)
3082 # do local checksums
3083 master_node = self.master_node = self.cfg.GetMasterNode()
3084 master_ip = self.cfg.GetMasterIP()
3086 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3089 if self.cfg.GetUseExternalMipScript():
3090 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3092 node_verify_param = {
3093 constants.NV_FILELIST:
3094 utils.UniqueSequence(filename
3095 for files in filemap
3096 for filename in files),
3097 constants.NV_NODELIST:
3098 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3099 self.all_node_info.values()),
3100 constants.NV_HYPERVISOR: hypervisors,
3101 constants.NV_HVPARAMS:
3102 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3103 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3104 for node in node_data_list
3105 if not node.offline],
3106 constants.NV_INSTANCELIST: hypervisors,
3107 constants.NV_VERSION: None,
3108 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3109 constants.NV_NODESETUP: None,
3110 constants.NV_TIME: None,
3111 constants.NV_MASTERIP: (master_node, master_ip),
3112 constants.NV_OSLIST: None,
3113 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3114 constants.NV_USERSCRIPTS: user_scripts,
3117 if vg_name is not None:
3118 node_verify_param[constants.NV_VGLIST] = None
3119 node_verify_param[constants.NV_LVLIST] = vg_name
3120 node_verify_param[constants.NV_PVLIST] = [vg_name]
3121 node_verify_param[constants.NV_DRBDLIST] = None
3124 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3127 # FIXME: this needs to be changed per node-group, not cluster-wide
3129 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3130 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3131 bridges.add(default_nicpp[constants.NIC_LINK])
3132 for instance in self.my_inst_info.values():
3133 for nic in instance.nics:
3134 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3135 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3136 bridges.add(full_nic[constants.NIC_LINK])
3139 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3141 # Build our expected cluster state
3142 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3144 vm_capable=node.vm_capable))
3145 for node in node_data_list)
3149 for node in self.all_node_info.values():
3150 path = _SupportsOob(self.cfg, node)
3151 if path and path not in oob_paths:
3152 oob_paths.append(path)
3155 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3157 for instance in self.my_inst_names:
3158 inst_config = self.my_inst_info[instance]
3159 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3162 for nname in inst_config.all_nodes:
3163 if nname not in node_image:
3164 gnode = self.NodeImage(name=nname)
3165 gnode.ghost = (nname not in self.all_node_info)
3166 node_image[nname] = gnode
3168 inst_config.MapLVsByNode(node_vol_should)
3170 pnode = inst_config.primary_node
3171 node_image[pnode].pinst.append(instance)
3173 for snode in inst_config.secondary_nodes:
3174 nimg = node_image[snode]
3175 nimg.sinst.append(instance)
3176 if pnode not in nimg.sbp:
3177 nimg.sbp[pnode] = []
3178 nimg.sbp[pnode].append(instance)
3180 # At this point, we have the in-memory data structures complete,
3181 # except for the runtime information, which we'll gather next
3183 # Due to the way our RPC system works, exact response times cannot be
3184 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3185 # time before and after executing the request, we can at least have a time
3187 nvinfo_starttime = time.time()
3188 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3190 self.cfg.GetClusterName())
3191 nvinfo_endtime = time.time()
3193 if self.extra_lv_nodes and vg_name is not None:
3195 self.rpc.call_node_verify(self.extra_lv_nodes,
3196 {constants.NV_LVLIST: vg_name},
3197 self.cfg.GetClusterName())
3199 extra_lv_nvinfo = {}
3201 all_drbd_map = self.cfg.ComputeDRBDMap()
3203 feedback_fn("* Gathering disk information (%s nodes)" %
3204 len(self.my_node_names))
3205 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3208 feedback_fn("* Verifying configuration file consistency")
3210 # If not all nodes are being checked, we need to make sure the master node
3211 # and a non-checked vm_capable node are in the list.
3212 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3214 vf_nvinfo = all_nvinfo.copy()
3215 vf_node_info = list(self.my_node_info.values())
3216 additional_nodes = []
3217 if master_node not in self.my_node_info:
3218 additional_nodes.append(master_node)
3219 vf_node_info.append(self.all_node_info[master_node])
3220 # Add the first vm_capable node we find which is not included
3221 for node in absent_nodes:
3222 nodeinfo = self.all_node_info[node]
3223 if nodeinfo.vm_capable and not nodeinfo.offline:
3224 additional_nodes.append(node)
3225 vf_node_info.append(self.all_node_info[node])
3227 key = constants.NV_FILELIST
3228 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3229 {key: node_verify_param[key]},
3230 self.cfg.GetClusterName()))
3232 vf_nvinfo = all_nvinfo
3233 vf_node_info = self.my_node_info.values()
3235 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3237 feedback_fn("* Verifying node status")
3241 for node_i in node_data_list:
3243 nimg = node_image[node]
3247 feedback_fn("* Skipping offline node %s" % (node,))
3251 if node == master_node:
3253 elif node_i.master_candidate:
3254 ntype = "master candidate"
3255 elif node_i.drained:
3261 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3263 msg = all_nvinfo[node].fail_msg
3264 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3267 nimg.rpc_fail = True
3270 nresult = all_nvinfo[node].payload
3272 nimg.call_ok = self._VerifyNode(node_i, nresult)
3273 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3274 self._VerifyNodeNetwork(node_i, nresult)
3275 self._VerifyNodeUserScripts(node_i, nresult)
3276 self._VerifyOob(node_i, nresult)
3279 self._VerifyNodeLVM(node_i, nresult, vg_name)
3280 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3283 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3284 self._UpdateNodeInstances(node_i, nresult, nimg)
3285 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3286 self._UpdateNodeOS(node_i, nresult, nimg)
3288 if not nimg.os_fail:
3289 if refos_img is None:
3291 self._VerifyNodeOS(node_i, nimg, refos_img)
3292 self._VerifyNodeBridges(node_i, nresult, bridges)
3294 # Check whether all running instancies are primary for the node. (This
3295 # can no longer be done from _VerifyInstance below, since some of the
3296 # wrong instances could be from other node groups.)
3297 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3299 for inst in non_primary_inst:
3300 test = inst in self.all_inst_info
3301 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3302 "instance should not run on node %s", node_i.name)
3303 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3304 "node is running unknown instance %s", inst)
3306 for node, result in extra_lv_nvinfo.items():
3307 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3308 node_image[node], vg_name)
3310 feedback_fn("* Verifying instance status")
3311 for instance in self.my_inst_names:
3313 feedback_fn("* Verifying instance %s" % instance)
3314 inst_config = self.my_inst_info[instance]
3315 self._VerifyInstance(instance, inst_config, node_image,
3317 inst_nodes_offline = []
3319 pnode = inst_config.primary_node
3320 pnode_img = node_image[pnode]
3321 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3322 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3323 " primary node failed", instance)
3325 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3327 constants.CV_EINSTANCEBADNODE, instance,
3328 "instance is marked as running and lives on offline node %s",
3329 inst_config.primary_node)
3331 # If the instance is non-redundant we cannot survive losing its primary
3332 # node, so we are not N+1 compliant. On the other hand we have no disk
3333 # templates with more than one secondary so that situation is not well
3335 # FIXME: does not support file-backed instances
3336 if not inst_config.secondary_nodes:
3337 i_non_redundant.append(instance)
3339 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3340 constants.CV_EINSTANCELAYOUT,
3341 instance, "instance has multiple secondary nodes: %s",
3342 utils.CommaJoin(inst_config.secondary_nodes),
3343 code=self.ETYPE_WARNING)
3345 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3346 pnode = inst_config.primary_node
3347 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3348 instance_groups = {}
3350 for node in instance_nodes:
3351 instance_groups.setdefault(self.all_node_info[node].group,
3355 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3356 # Sort so that we always list the primary node first.
3357 for group, nodes in sorted(instance_groups.items(),
3358 key=lambda (_, nodes): pnode in nodes,
3361 self._ErrorIf(len(instance_groups) > 1,
3362 constants.CV_EINSTANCESPLITGROUPS,
3363 instance, "instance has primary and secondary nodes in"
3364 " different groups: %s", utils.CommaJoin(pretty_list),
3365 code=self.ETYPE_WARNING)
3367 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3368 i_non_a_balanced.append(instance)
3370 for snode in inst_config.secondary_nodes:
3371 s_img = node_image[snode]
3372 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3373 snode, "instance %s, connection to secondary node failed",
3377 inst_nodes_offline.append(snode)
3379 # warn that the instance lives on offline nodes
3380 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3381 "instance has offline secondary node(s) %s",
3382 utils.CommaJoin(inst_nodes_offline))
3383 # ... or ghost/non-vm_capable nodes
3384 for node in inst_config.all_nodes:
3385 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3386 instance, "instance lives on ghost node %s", node)
3387 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3388 instance, "instance lives on non-vm_capable node %s", node)
3390 feedback_fn("* Verifying orphan volumes")
3391 reserved = utils.FieldSet(*cluster.reserved_lvs)
3393 # We will get spurious "unknown volume" warnings if any node of this group
3394 # is secondary for an instance whose primary is in another group. To avoid
3395 # them, we find these instances and add their volumes to node_vol_should.
3396 for inst in self.all_inst_info.values():
3397 for secondary in inst.secondary_nodes:
3398 if (secondary in self.my_node_info
3399 and inst.name not in self.my_inst_info):
3400 inst.MapLVsByNode(node_vol_should)
3403 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3405 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3406 feedback_fn("* Verifying N+1 Memory redundancy")
3407 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3409 feedback_fn("* Other Notes")
3411 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3412 % len(i_non_redundant))
3414 if i_non_a_balanced:
3415 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3416 % len(i_non_a_balanced))
3419 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3422 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3425 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3429 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3430 """Analyze the post-hooks' result
3432 This method analyses the hook result, handles it, and sends some
3433 nicely-formatted feedback back to the user.
3435 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3436 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3437 @param hooks_results: the results of the multi-node hooks rpc call
3438 @param feedback_fn: function used send feedback back to the caller
3439 @param lu_result: previous Exec result
3440 @return: the new Exec result, based on the previous result
3444 # We only really run POST phase hooks, only for non-empty groups,
3445 # and are only interested in their results
3446 if not self.my_node_names:
3449 elif phase == constants.HOOKS_PHASE_POST:
3450 # Used to change hooks' output to proper indentation
3451 feedback_fn("* Hooks Results")
3452 assert hooks_results, "invalid result from hooks"
3454 for node_name in hooks_results:
3455 res = hooks_results[node_name]
3457 test = msg and not res.offline
3458 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3459 "Communication failure in hooks execution: %s", msg)
3460 if res.offline or msg:
3461 # No need to investigate payload if node is offline or gave
3464 for script, hkr, output in res.payload:
3465 test = hkr == constants.HKR_FAIL
3466 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3467 "Script %s failed, output:", script)
3469 output = self._HOOKS_INDENT_RE.sub(" ", output)
3470 feedback_fn("%s" % output)
3476 class LUClusterVerifyDisks(NoHooksLU):
3477 """Verifies the cluster disks status.
3482 def ExpandNames(self):
3483 self.share_locks = _ShareAll()
3484 self.needed_locks = {
3485 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3488 def Exec(self, feedback_fn):
3489 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3491 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3492 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3493 for group in group_names])
3496 class LUGroupVerifyDisks(NoHooksLU):
3497 """Verifies the status of all disks in a node group.
3502 def ExpandNames(self):
3503 # Raises errors.OpPrereqError on its own if group can't be found
3504 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3506 self.share_locks = _ShareAll()
3507 self.needed_locks = {
3508 locking.LEVEL_INSTANCE: [],
3509 locking.LEVEL_NODEGROUP: [],
3510 locking.LEVEL_NODE: [],
3513 def DeclareLocks(self, level):
3514 if level == locking.LEVEL_INSTANCE:
3515 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3517 # Lock instances optimistically, needs verification once node and group
3518 # locks have been acquired
3519 self.needed_locks[locking.LEVEL_INSTANCE] = \
3520 self.cfg.GetNodeGroupInstances(self.group_uuid)
3522 elif level == locking.LEVEL_NODEGROUP:
3523 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3525 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3526 set([self.group_uuid] +
3527 # Lock all groups used by instances optimistically; this requires
3528 # going via the node before it's locked, requiring verification
3531 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3532 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3534 elif level == locking.LEVEL_NODE:
3535 # This will only lock the nodes in the group to be verified which contain
3537 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3538 self._LockInstancesNodes()
3540 # Lock all nodes in group to be verified
3541 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3542 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3543 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3545 def CheckPrereq(self):
3546 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3547 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3548 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3550 assert self.group_uuid in owned_groups
3552 # Check if locked instances are still correct
3553 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3555 # Get instance information
3556 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3558 # Check if node groups for locked instances are still correct
3559 _CheckInstancesNodeGroups(self.cfg, self.instances,
3560 owned_groups, owned_nodes, self.group_uuid)
3562 def Exec(self, feedback_fn):
3563 """Verify integrity of cluster disks.
3565 @rtype: tuple of three items
3566 @return: a tuple of (dict of node-to-node_error, list of instances
3567 which need activate-disks, dict of instance: (node, volume) for
3572 res_instances = set()
3575 nv_dict = _MapInstanceDisksToNodes([inst
3576 for inst in self.instances.values()
3577 if inst.admin_state == constants.ADMINST_UP])
3580 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3581 set(self.cfg.GetVmCapableNodeList()))
3583 node_lvs = self.rpc.call_lv_list(nodes, [])
3585 for (node, node_res) in node_lvs.items():
3586 if node_res.offline:
3589 msg = node_res.fail_msg
3591 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3592 res_nodes[node] = msg
3595 for lv_name, (_, _, lv_online) in node_res.payload.items():
3596 inst = nv_dict.pop((node, lv_name), None)
3597 if not (lv_online or inst is None):
3598 res_instances.add(inst)
3600 # any leftover items in nv_dict are missing LVs, let's arrange the data
3602 for key, inst in nv_dict.iteritems():
3603 res_missing.setdefault(inst, []).append(list(key))
3605 return (res_nodes, list(res_instances), res_missing)
3608 class LUClusterRepairDiskSizes(NoHooksLU):
3609 """Verifies the cluster disks sizes.
3614 def ExpandNames(self):
3615 if self.op.instances:
3616 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3617 self.needed_locks = {
3618 locking.LEVEL_NODE_RES: [],
3619 locking.LEVEL_INSTANCE: self.wanted_names,
3621 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3623 self.wanted_names = None
3624 self.needed_locks = {
3625 locking.LEVEL_NODE_RES: locking.ALL_SET,
3626 locking.LEVEL_INSTANCE: locking.ALL_SET,
3628 self.share_locks = {
3629 locking.LEVEL_NODE_RES: 1,
3630 locking.LEVEL_INSTANCE: 0,
3633 def DeclareLocks(self, level):
3634 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3635 self._LockInstancesNodes(primary_only=True, level=level)
3637 def CheckPrereq(self):
3638 """Check prerequisites.
3640 This only checks the optional instance list against the existing names.
3643 if self.wanted_names is None:
3644 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3646 self.wanted_instances = \
3647 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3649 def _EnsureChildSizes(self, disk):
3650 """Ensure children of the disk have the needed disk size.
3652 This is valid mainly for DRBD8 and fixes an issue where the
3653 children have smaller disk size.
3655 @param disk: an L{ganeti.objects.Disk} object
3658 if disk.dev_type == constants.LD_DRBD8:
3659 assert disk.children, "Empty children for DRBD8?"
3660 fchild = disk.children[0]
3661 mismatch = fchild.size < disk.size
3663 self.LogInfo("Child disk has size %d, parent %d, fixing",
3664 fchild.size, disk.size)
3665 fchild.size = disk.size
3667 # and we recurse on this child only, not on the metadev
3668 return self._EnsureChildSizes(fchild) or mismatch
3672 def Exec(self, feedback_fn):
3673 """Verify the size of cluster disks.
3676 # TODO: check child disks too
3677 # TODO: check differences in size between primary/secondary nodes
3679 for instance in self.wanted_instances:
3680 pnode = instance.primary_node
3681 if pnode not in per_node_disks:
3682 per_node_disks[pnode] = []
3683 for idx, disk in enumerate(instance.disks):
3684 per_node_disks[pnode].append((instance, idx, disk))
3686 assert not (frozenset(per_node_disks.keys()) -
3687 self.owned_locks(locking.LEVEL_NODE_RES)), \
3688 "Not owning correct locks"
3689 assert not self.owned_locks(locking.LEVEL_NODE)
3692 for node, dskl in per_node_disks.items():
3693 newl = [v[2].Copy() for v in dskl]
3695 self.cfg.SetDiskID(dsk, node)
3696 result = self.rpc.call_blockdev_getsize(node, newl)
3698 self.LogWarning("Failure in blockdev_getsize call to node"
3699 " %s, ignoring", node)
3701 if len(result.payload) != len(dskl):
3702 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3703 " result.payload=%s", node, len(dskl), result.payload)
3704 self.LogWarning("Invalid result from node %s, ignoring node results",
3707 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3709 self.LogWarning("Disk %d of instance %s did not return size"
3710 " information, ignoring", idx, instance.name)
3712 if not isinstance(size, (int, long)):
3713 self.LogWarning("Disk %d of instance %s did not return valid"
3714 " size information, ignoring", idx, instance.name)
3717 if size != disk.size:
3718 self.LogInfo("Disk %d of instance %s has mismatched size,"
3719 " correcting: recorded %d, actual %d", idx,
3720 instance.name, disk.size, size)
3722 self.cfg.Update(instance, feedback_fn)
3723 changed.append((instance.name, idx, size))
3724 if self._EnsureChildSizes(disk):
3725 self.cfg.Update(instance, feedback_fn)
3726 changed.append((instance.name, idx, disk.size))
3730 class LUClusterRename(LogicalUnit):
3731 """Rename the cluster.
3734 HPATH = "cluster-rename"
3735 HTYPE = constants.HTYPE_CLUSTER
3737 def BuildHooksEnv(self):
3742 "OP_TARGET": self.cfg.GetClusterName(),
3743 "NEW_NAME": self.op.name,
3746 def BuildHooksNodes(self):
3747 """Build hooks nodes.
3750 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3752 def CheckPrereq(self):
3753 """Verify that the passed name is a valid one.
3756 hostname = netutils.GetHostname(name=self.op.name,
3757 family=self.cfg.GetPrimaryIPFamily())
3759 new_name = hostname.name
3760 self.ip = new_ip = hostname.ip
3761 old_name = self.cfg.GetClusterName()
3762 old_ip = self.cfg.GetMasterIP()
3763 if new_name == old_name and new_ip == old_ip:
3764 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3765 " cluster has changed",
3767 if new_ip != old_ip:
3768 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3769 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3770 " reachable on the network" %
3771 new_ip, errors.ECODE_NOTUNIQUE)
3773 self.op.name = new_name
3775 def Exec(self, feedback_fn):
3776 """Rename the cluster.
3779 clustername = self.op.name
3782 # shutdown the master IP
3783 master_params = self.cfg.GetMasterNetworkParameters()
3784 ems = self.cfg.GetUseExternalMipScript()
3785 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3787 result.Raise("Could not disable the master role")
3790 cluster = self.cfg.GetClusterInfo()
3791 cluster.cluster_name = clustername
3792 cluster.master_ip = new_ip
3793 self.cfg.Update(cluster, feedback_fn)
3795 # update the known hosts file
3796 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3797 node_list = self.cfg.GetOnlineNodeList()
3799 node_list.remove(master_params.name)
3802 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3804 master_params.ip = new_ip
3805 result = self.rpc.call_node_activate_master_ip(master_params.name,
3807 msg = result.fail_msg
3809 self.LogWarning("Could not re-enable the master role on"
3810 " the master, please restart manually: %s", msg)
3815 def _ValidateNetmask(cfg, netmask):
3816 """Checks if a netmask is valid.
3818 @type cfg: L{config.ConfigWriter}
3819 @param cfg: The cluster configuration
3821 @param netmask: the netmask to be verified
3822 @raise errors.OpPrereqError: if the validation fails
3825 ip_family = cfg.GetPrimaryIPFamily()
3827 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3828 except errors.ProgrammerError:
3829 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3831 if not ipcls.ValidateNetmask(netmask):
3832 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3836 class LUClusterSetParams(LogicalUnit):
3837 """Change the parameters of the cluster.
3840 HPATH = "cluster-modify"
3841 HTYPE = constants.HTYPE_CLUSTER
3844 def CheckArguments(self):
3848 if self.op.uid_pool:
3849 uidpool.CheckUidPool(self.op.uid_pool)
3851 if self.op.add_uids:
3852 uidpool.CheckUidPool(self.op.add_uids)
3854 if self.op.remove_uids:
3855 uidpool.CheckUidPool(self.op.remove_uids)
3857 if self.op.master_netmask is not None:
3858 _ValidateNetmask(self.cfg, self.op.master_netmask)
3860 if self.op.diskparams:
3861 for dt_params in self.op.diskparams.values():
3862 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3864 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3865 except errors.OpPrereqError, err:
3866 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3869 def ExpandNames(self):
3870 # FIXME: in the future maybe other cluster params won't require checking on
3871 # all nodes to be modified.
3872 self.needed_locks = {
3873 locking.LEVEL_NODE: locking.ALL_SET,
3874 locking.LEVEL_INSTANCE: locking.ALL_SET,
3875 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3877 self.share_locks = {
3878 locking.LEVEL_NODE: 1,
3879 locking.LEVEL_INSTANCE: 1,
3880 locking.LEVEL_NODEGROUP: 1,
3883 def BuildHooksEnv(self):
3888 "OP_TARGET": self.cfg.GetClusterName(),
3889 "NEW_VG_NAME": self.op.vg_name,
3892 def BuildHooksNodes(self):
3893 """Build hooks nodes.
3896 mn = self.cfg.GetMasterNode()
3899 def CheckPrereq(self):
3900 """Check prerequisites.
3902 This checks whether the given params don't conflict and
3903 if the given volume group is valid.
3906 if self.op.vg_name is not None and not self.op.vg_name:
3907 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3908 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3909 " instances exist", errors.ECODE_INVAL)
3911 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3912 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3913 raise errors.OpPrereqError("Cannot disable drbd helper while"
3914 " drbd-based instances exist",
3917 node_list = self.owned_locks(locking.LEVEL_NODE)
3919 # if vg_name not None, checks given volume group on all nodes
3921 vglist = self.rpc.call_vg_list(node_list)
3922 for node in node_list:
3923 msg = vglist[node].fail_msg
3925 # ignoring down node
3926 self.LogWarning("Error while gathering data on node %s"
3927 " (ignoring node): %s", node, msg)
3929 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3931 constants.MIN_VG_SIZE)
3933 raise errors.OpPrereqError("Error on node '%s': %s" %
3934 (node, vgstatus), errors.ECODE_ENVIRON)
3936 if self.op.drbd_helper:
3937 # checks given drbd helper on all nodes
3938 helpers = self.rpc.call_drbd_helper(node_list)
3939 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3941 self.LogInfo("Not checking drbd helper on offline node %s", node)
3943 msg = helpers[node].fail_msg
3945 raise errors.OpPrereqError("Error checking drbd helper on node"
3946 " '%s': %s" % (node, msg),
3947 errors.ECODE_ENVIRON)
3948 node_helper = helpers[node].payload
3949 if node_helper != self.op.drbd_helper:
3950 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3951 (node, node_helper), errors.ECODE_ENVIRON)
3953 self.cluster = cluster = self.cfg.GetClusterInfo()
3954 # validate params changes
3955 if self.op.beparams:
3956 objects.UpgradeBeParams(self.op.beparams)
3957 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3958 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3960 if self.op.ndparams:
3961 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3962 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3964 # TODO: we need a more general way to handle resetting
3965 # cluster-level parameters to default values
3966 if self.new_ndparams["oob_program"] == "":
3967 self.new_ndparams["oob_program"] = \
3968 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3970 if self.op.hv_state:
3971 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3972 self.cluster.hv_state_static)
3973 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3974 for hv, values in new_hv_state.items())
3976 if self.op.disk_state:
3977 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3978 self.cluster.disk_state_static)
3979 self.new_disk_state = \
3980 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3981 for name, values in svalues.items()))
3982 for storage, svalues in new_disk_state.items())
3985 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3988 all_instances = self.cfg.GetAllInstancesInfo().values()
3990 for group in self.cfg.GetAllNodeGroupsInfo().values():
3991 instances = frozenset([inst for inst in all_instances
3992 if compat.any(node in group.members
3993 for node in inst.all_nodes)])
3994 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3995 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3997 new_ipolicy, instances)
3999 violations.update(new)
4002 self.LogWarning("After the ipolicy change the following instances"
4003 " violate them: %s",
4004 utils.CommaJoin(utils.NiceSort(violations)))
4006 if self.op.nicparams:
4007 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4008 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4009 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4012 # check all instances for consistency
4013 for instance in self.cfg.GetAllInstancesInfo().values():
4014 for nic_idx, nic in enumerate(instance.nics):
4015 params_copy = copy.deepcopy(nic.nicparams)
4016 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4018 # check parameter syntax
4020 objects.NIC.CheckParameterSyntax(params_filled)
4021 except errors.ConfigurationError, err:
4022 nic_errors.append("Instance %s, nic/%d: %s" %
4023 (instance.name, nic_idx, err))
4025 # if we're moving instances to routed, check that they have an ip
4026 target_mode = params_filled[constants.NIC_MODE]
4027 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4028 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4029 " address" % (instance.name, nic_idx))
4031 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4032 "\n".join(nic_errors))
4034 # hypervisor list/parameters
4035 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4036 if self.op.hvparams:
4037 for hv_name, hv_dict in self.op.hvparams.items():
4038 if hv_name not in self.new_hvparams:
4039 self.new_hvparams[hv_name] = hv_dict
4041 self.new_hvparams[hv_name].update(hv_dict)
4043 # disk template parameters
4044 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4045 if self.op.diskparams:
4046 for dt_name, dt_params in self.op.diskparams.items():
4047 if dt_name not in self.op.diskparams:
4048 self.new_diskparams[dt_name] = dt_params
4050 self.new_diskparams[dt_name].update(dt_params)
4052 # os hypervisor parameters
4053 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4055 for os_name, hvs in self.op.os_hvp.items():
4056 if os_name not in self.new_os_hvp:
4057 self.new_os_hvp[os_name] = hvs
4059 for hv_name, hv_dict in hvs.items():
4060 if hv_name not in self.new_os_hvp[os_name]:
4061 self.new_os_hvp[os_name][hv_name] = hv_dict
4063 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4066 self.new_osp = objects.FillDict(cluster.osparams, {})
4067 if self.op.osparams:
4068 for os_name, osp in self.op.osparams.items():
4069 if os_name not in self.new_osp:
4070 self.new_osp[os_name] = {}
4072 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4075 if not self.new_osp[os_name]:
4076 # we removed all parameters
4077 del self.new_osp[os_name]
4079 # check the parameter validity (remote check)
4080 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4081 os_name, self.new_osp[os_name])
4083 # changes to the hypervisor list
4084 if self.op.enabled_hypervisors is not None:
4085 self.hv_list = self.op.enabled_hypervisors
4086 for hv in self.hv_list:
4087 # if the hypervisor doesn't already exist in the cluster
4088 # hvparams, we initialize it to empty, and then (in both
4089 # cases) we make sure to fill the defaults, as we might not
4090 # have a complete defaults list if the hypervisor wasn't
4092 if hv not in new_hvp:
4094 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4095 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4097 self.hv_list = cluster.enabled_hypervisors
4099 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4100 # either the enabled list has changed, or the parameters have, validate
4101 for hv_name, hv_params in self.new_hvparams.items():
4102 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4103 (self.op.enabled_hypervisors and
4104 hv_name in self.op.enabled_hypervisors)):
4105 # either this is a new hypervisor, or its parameters have changed
4106 hv_class = hypervisor.GetHypervisor(hv_name)
4107 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4108 hv_class.CheckParameterSyntax(hv_params)
4109 _CheckHVParams(self, node_list, hv_name, hv_params)
4112 # no need to check any newly-enabled hypervisors, since the
4113 # defaults have already been checked in the above code-block
4114 for os_name, os_hvp in self.new_os_hvp.items():
4115 for hv_name, hv_params in os_hvp.items():
4116 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4117 # we need to fill in the new os_hvp on top of the actual hv_p
4118 cluster_defaults = self.new_hvparams.get(hv_name, {})
4119 new_osp = objects.FillDict(cluster_defaults, hv_params)
4120 hv_class = hypervisor.GetHypervisor(hv_name)
4121 hv_class.CheckParameterSyntax(new_osp)
4122 _CheckHVParams(self, node_list, hv_name, new_osp)
4124 if self.op.default_iallocator:
4125 alloc_script = utils.FindFile(self.op.default_iallocator,
4126 constants.IALLOCATOR_SEARCH_PATH,
4128 if alloc_script is None:
4129 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4130 " specified" % self.op.default_iallocator,
4133 def Exec(self, feedback_fn):
4134 """Change the parameters of the cluster.
4137 if self.op.vg_name is not None:
4138 new_volume = self.op.vg_name
4141 if new_volume != self.cfg.GetVGName():
4142 self.cfg.SetVGName(new_volume)
4144 feedback_fn("Cluster LVM configuration already in desired"
4145 " state, not changing")
4146 if self.op.drbd_helper is not None:
4147 new_helper = self.op.drbd_helper
4150 if new_helper != self.cfg.GetDRBDHelper():
4151 self.cfg.SetDRBDHelper(new_helper)
4153 feedback_fn("Cluster DRBD helper already in desired state,"
4155 if self.op.hvparams:
4156 self.cluster.hvparams = self.new_hvparams
4158 self.cluster.os_hvp = self.new_os_hvp
4159 if self.op.enabled_hypervisors is not None:
4160 self.cluster.hvparams = self.new_hvparams
4161 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4162 if self.op.beparams:
4163 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4164 if self.op.nicparams:
4165 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4167 self.cluster.ipolicy = self.new_ipolicy
4168 if self.op.osparams:
4169 self.cluster.osparams = self.new_osp
4170 if self.op.ndparams:
4171 self.cluster.ndparams = self.new_ndparams
4172 if self.op.diskparams:
4173 self.cluster.diskparams = self.new_diskparams
4174 if self.op.hv_state:
4175 self.cluster.hv_state_static = self.new_hv_state
4176 if self.op.disk_state:
4177 self.cluster.disk_state_static = self.new_disk_state
4179 if self.op.candidate_pool_size is not None:
4180 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4181 # we need to update the pool size here, otherwise the save will fail
4182 _AdjustCandidatePool(self, [])
4184 if self.op.maintain_node_health is not None:
4185 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4186 feedback_fn("Note: CONFD was disabled at build time, node health"
4187 " maintenance is not useful (still enabling it)")
4188 self.cluster.maintain_node_health = self.op.maintain_node_health
4190 if self.op.prealloc_wipe_disks is not None:
4191 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4193 if self.op.add_uids is not None:
4194 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4196 if self.op.remove_uids is not None:
4197 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4199 if self.op.uid_pool is not None:
4200 self.cluster.uid_pool = self.op.uid_pool
4202 if self.op.default_iallocator is not None:
4203 self.cluster.default_iallocator = self.op.default_iallocator
4205 if self.op.reserved_lvs is not None:
4206 self.cluster.reserved_lvs = self.op.reserved_lvs
4208 if self.op.use_external_mip_script is not None:
4209 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4211 def helper_os(aname, mods, desc):
4213 lst = getattr(self.cluster, aname)
4214 for key, val in mods:
4215 if key == constants.DDM_ADD:
4217 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4220 elif key == constants.DDM_REMOVE:
4224 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4226 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4228 if self.op.hidden_os:
4229 helper_os("hidden_os", self.op.hidden_os, "hidden")
4231 if self.op.blacklisted_os:
4232 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4234 if self.op.master_netdev:
4235 master_params = self.cfg.GetMasterNetworkParameters()
4236 ems = self.cfg.GetUseExternalMipScript()
4237 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4238 self.cluster.master_netdev)
4239 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4241 result.Raise("Could not disable the master ip")
4242 feedback_fn("Changing master_netdev from %s to %s" %
4243 (master_params.netdev, self.op.master_netdev))
4244 self.cluster.master_netdev = self.op.master_netdev
4246 if self.op.master_netmask:
4247 master_params = self.cfg.GetMasterNetworkParameters()
4248 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4249 result = self.rpc.call_node_change_master_netmask(master_params.name,
4250 master_params.netmask,
4251 self.op.master_netmask,
4253 master_params.netdev)
4255 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4258 self.cluster.master_netmask = self.op.master_netmask
4260 self.cfg.Update(self.cluster, feedback_fn)
4262 if self.op.master_netdev:
4263 master_params = self.cfg.GetMasterNetworkParameters()
4264 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4265 self.op.master_netdev)
4266 ems = self.cfg.GetUseExternalMipScript()
4267 result = self.rpc.call_node_activate_master_ip(master_params.name,
4270 self.LogWarning("Could not re-enable the master ip on"
4271 " the master, please restart manually: %s",
4275 def _UploadHelper(lu, nodes, fname):
4276 """Helper for uploading a file and showing warnings.
4279 if os.path.exists(fname):
4280 result = lu.rpc.call_upload_file(nodes, fname)
4281 for to_node, to_result in result.items():
4282 msg = to_result.fail_msg
4284 msg = ("Copy of file %s to node %s failed: %s" %
4285 (fname, to_node, msg))
4286 lu.proc.LogWarning(msg)
4289 def _ComputeAncillaryFiles(cluster, redist):
4290 """Compute files external to Ganeti which need to be consistent.
4292 @type redist: boolean
4293 @param redist: Whether to include files which need to be redistributed
4296 # Compute files for all nodes
4298 constants.SSH_KNOWN_HOSTS_FILE,
4299 constants.CONFD_HMAC_KEY,
4300 constants.CLUSTER_DOMAIN_SECRET_FILE,
4301 constants.SPICE_CERT_FILE,
4302 constants.SPICE_CACERT_FILE,
4303 constants.RAPI_USERS_FILE,
4307 files_all.update(constants.ALL_CERT_FILES)
4308 files_all.update(ssconf.SimpleStore().GetFileList())
4310 # we need to ship at least the RAPI certificate
4311 files_all.add(constants.RAPI_CERT_FILE)
4313 if cluster.modify_etc_hosts:
4314 files_all.add(constants.ETC_HOSTS)
4316 if cluster.use_external_mip_script:
4317 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4319 # Files which are optional, these must:
4320 # - be present in one other category as well
4321 # - either exist or not exist on all nodes of that category (mc, vm all)
4323 constants.RAPI_USERS_FILE,
4326 # Files which should only be on master candidates
4330 files_mc.add(constants.CLUSTER_CONF_FILE)
4332 # Files which should only be on VM-capable nodes
4333 files_vm = set(filename
4334 for hv_name in cluster.enabled_hypervisors
4335 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4337 files_opt |= set(filename
4338 for hv_name in cluster.enabled_hypervisors
4339 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4341 # Filenames in each category must be unique
4342 all_files_set = files_all | files_mc | files_vm
4343 assert (len(all_files_set) ==
4344 sum(map(len, [files_all, files_mc, files_vm]))), \
4345 "Found file listed in more than one file list"
4347 # Optional files must be present in one other category
4348 assert all_files_set.issuperset(files_opt), \
4349 "Optional file not in a different required list"
4351 return (files_all, files_opt, files_mc, files_vm)
4354 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4355 """Distribute additional files which are part of the cluster configuration.
4357 ConfigWriter takes care of distributing the config and ssconf files, but
4358 there are more files which should be distributed to all nodes. This function
4359 makes sure those are copied.
4361 @param lu: calling logical unit
4362 @param additional_nodes: list of nodes not in the config to distribute to
4363 @type additional_vm: boolean
4364 @param additional_vm: whether the additional nodes are vm-capable or not
4367 # Gather target nodes
4368 cluster = lu.cfg.GetClusterInfo()
4369 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4371 online_nodes = lu.cfg.GetOnlineNodeList()
4372 online_set = frozenset(online_nodes)
4373 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4375 if additional_nodes is not None:
4376 online_nodes.extend(additional_nodes)
4378 vm_nodes.extend(additional_nodes)
4380 # Never distribute to master node
4381 for nodelist in [online_nodes, vm_nodes]:
4382 if master_info.name in nodelist:
4383 nodelist.remove(master_info.name)
4386 (files_all, _, files_mc, files_vm) = \
4387 _ComputeAncillaryFiles(cluster, True)
4389 # Never re-distribute configuration file from here
4390 assert not (constants.CLUSTER_CONF_FILE in files_all or
4391 constants.CLUSTER_CONF_FILE in files_vm)
4392 assert not files_mc, "Master candidates not handled in this function"
4395 (online_nodes, files_all),
4396 (vm_nodes, files_vm),
4400 for (node_list, files) in filemap:
4402 _UploadHelper(lu, node_list, fname)
4405 class LUClusterRedistConf(NoHooksLU):
4406 """Force the redistribution of cluster configuration.
4408 This is a very simple LU.
4413 def ExpandNames(self):
4414 self.needed_locks = {
4415 locking.LEVEL_NODE: locking.ALL_SET,
4417 self.share_locks[locking.LEVEL_NODE] = 1
4419 def Exec(self, feedback_fn):
4420 """Redistribute the configuration.
4423 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4424 _RedistributeAncillaryFiles(self)
4427 class LUClusterActivateMasterIp(NoHooksLU):
4428 """Activate the master IP on the master node.
4431 def Exec(self, feedback_fn):
4432 """Activate the master IP.
4435 master_params = self.cfg.GetMasterNetworkParameters()
4436 ems = self.cfg.GetUseExternalMipScript()
4437 result = self.rpc.call_node_activate_master_ip(master_params.name,
4439 result.Raise("Could not activate the master IP")
4442 class LUClusterDeactivateMasterIp(NoHooksLU):
4443 """Deactivate the master IP on the master node.
4446 def Exec(self, feedback_fn):
4447 """Deactivate the master IP.
4450 master_params = self.cfg.GetMasterNetworkParameters()
4451 ems = self.cfg.GetUseExternalMipScript()
4452 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4454 result.Raise("Could not deactivate the master IP")
4457 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4458 """Sleep and poll for an instance's disk to sync.
4461 if not instance.disks or disks is not None and not disks:
4464 disks = _ExpandCheckDisks(instance, disks)
4467 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4469 node = instance.primary_node
4472 lu.cfg.SetDiskID(dev, node)
4474 # TODO: Convert to utils.Retry
4477 degr_retries = 10 # in seconds, as we sleep 1 second each time
4481 cumul_degraded = False
4482 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4483 msg = rstats.fail_msg
4485 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4488 raise errors.RemoteError("Can't contact node %s for mirror data,"
4489 " aborting." % node)
4492 rstats = rstats.payload
4494 for i, mstat in enumerate(rstats):
4496 lu.LogWarning("Can't compute data for node %s/%s",
4497 node, disks[i].iv_name)
4500 cumul_degraded = (cumul_degraded or
4501 (mstat.is_degraded and mstat.sync_percent is None))
4502 if mstat.sync_percent is not None:
4504 if mstat.estimated_time is not None:
4505 rem_time = ("%s remaining (estimated)" %
4506 utils.FormatSeconds(mstat.estimated_time))
4507 max_time = mstat.estimated_time
4509 rem_time = "no time estimate"
4510 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4511 (disks[i].iv_name, mstat.sync_percent, rem_time))
4513 # if we're done but degraded, let's do a few small retries, to
4514 # make sure we see a stable and not transient situation; therefore
4515 # we force restart of the loop
4516 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4517 logging.info("Degraded disks found, %d retries left", degr_retries)
4525 time.sleep(min(60, max_time))
4528 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4529 return not cumul_degraded
4532 def _BlockdevFind(lu, node, dev, instance):
4533 """Wrapper around call_blockdev_find to annotate diskparams.
4535 @param lu: A reference to the lu object
4536 @param node: The node to call out
4537 @param dev: The device to find
4538 @param instance: The instance object the device belongs to
4539 @returns The result of the rpc call
4542 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4543 return lu.rpc.call_blockdev_find(node, disk)
4546 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4547 """Wrapper around L{_CheckDiskConsistencyInner}.
4550 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4551 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4555 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4557 """Check that mirrors are not degraded.
4559 @attention: The device has to be annotated already.
4561 The ldisk parameter, if True, will change the test from the
4562 is_degraded attribute (which represents overall non-ok status for
4563 the device(s)) to the ldisk (representing the local storage status).
4566 lu.cfg.SetDiskID(dev, node)
4570 if on_primary or dev.AssembleOnSecondary():
4571 rstats = lu.rpc.call_blockdev_find(node, dev)
4572 msg = rstats.fail_msg
4574 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4576 elif not rstats.payload:
4577 lu.LogWarning("Can't find disk on node %s", node)
4581 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4583 result = result and not rstats.payload.is_degraded
4586 for child in dev.children:
4587 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4593 class LUOobCommand(NoHooksLU):
4594 """Logical unit for OOB handling.
4598 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4600 def ExpandNames(self):
4601 """Gather locks we need.
4604 if self.op.node_names:
4605 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4606 lock_names = self.op.node_names
4608 lock_names = locking.ALL_SET
4610 self.needed_locks = {
4611 locking.LEVEL_NODE: lock_names,
4614 def CheckPrereq(self):
4615 """Check prerequisites.
4618 - the node exists in the configuration
4621 Any errors are signaled by raising errors.OpPrereqError.
4625 self.master_node = self.cfg.GetMasterNode()
4627 assert self.op.power_delay >= 0.0
4629 if self.op.node_names:
4630 if (self.op.command in self._SKIP_MASTER and
4631 self.master_node in self.op.node_names):
4632 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4633 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4635 if master_oob_handler:
4636 additional_text = ("run '%s %s %s' if you want to operate on the"
4637 " master regardless") % (master_oob_handler,
4641 additional_text = "it does not support out-of-band operations"
4643 raise errors.OpPrereqError(("Operating on the master node %s is not"
4644 " allowed for %s; %s") %
4645 (self.master_node, self.op.command,
4646 additional_text), errors.ECODE_INVAL)
4648 self.op.node_names = self.cfg.GetNodeList()
4649 if self.op.command in self._SKIP_MASTER:
4650 self.op.node_names.remove(self.master_node)
4652 if self.op.command in self._SKIP_MASTER:
4653 assert self.master_node not in self.op.node_names
4655 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4657 raise errors.OpPrereqError("Node %s not found" % node_name,
4660 self.nodes.append(node)
4662 if (not self.op.ignore_status and
4663 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4664 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4665 " not marked offline") % node_name,
4668 def Exec(self, feedback_fn):
4669 """Execute OOB and return result if we expect any.
4672 master_node = self.master_node
4675 for idx, node in enumerate(utils.NiceSort(self.nodes,
4676 key=lambda node: node.name)):
4677 node_entry = [(constants.RS_NORMAL, node.name)]
4678 ret.append(node_entry)
4680 oob_program = _SupportsOob(self.cfg, node)
4683 node_entry.append((constants.RS_UNAVAIL, None))
4686 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4687 self.op.command, oob_program, node.name)
4688 result = self.rpc.call_run_oob(master_node, oob_program,
4689 self.op.command, node.name,
4693 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4694 node.name, result.fail_msg)
4695 node_entry.append((constants.RS_NODATA, None))
4698 self._CheckPayload(result)
4699 except errors.OpExecError, err:
4700 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4702 node_entry.append((constants.RS_NODATA, None))
4704 if self.op.command == constants.OOB_HEALTH:
4705 # For health we should log important events
4706 for item, status in result.payload:
4707 if status in [constants.OOB_STATUS_WARNING,
4708 constants.OOB_STATUS_CRITICAL]:
4709 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4710 item, node.name, status)
4712 if self.op.command == constants.OOB_POWER_ON:
4714 elif self.op.command == constants.OOB_POWER_OFF:
4715 node.powered = False
4716 elif self.op.command == constants.OOB_POWER_STATUS:
4717 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4718 if powered != node.powered:
4719 logging.warning(("Recorded power state (%s) of node '%s' does not"
4720 " match actual power state (%s)"), node.powered,
4723 # For configuration changing commands we should update the node
4724 if self.op.command in (constants.OOB_POWER_ON,
4725 constants.OOB_POWER_OFF):
4726 self.cfg.Update(node, feedback_fn)
4728 node_entry.append((constants.RS_NORMAL, result.payload))
4730 if (self.op.command == constants.OOB_POWER_ON and
4731 idx < len(self.nodes) - 1):
4732 time.sleep(self.op.power_delay)
4736 def _CheckPayload(self, result):
4737 """Checks if the payload is valid.
4739 @param result: RPC result
4740 @raises errors.OpExecError: If payload is not valid
4744 if self.op.command == constants.OOB_HEALTH:
4745 if not isinstance(result.payload, list):
4746 errs.append("command 'health' is expected to return a list but got %s" %
4747 type(result.payload))
4749 for item, status in result.payload:
4750 if status not in constants.OOB_STATUSES:
4751 errs.append("health item '%s' has invalid status '%s'" %
4754 if self.op.command == constants.OOB_POWER_STATUS:
4755 if not isinstance(result.payload, dict):
4756 errs.append("power-status is expected to return a dict but got %s" %
4757 type(result.payload))
4759 if self.op.command in [
4760 constants.OOB_POWER_ON,
4761 constants.OOB_POWER_OFF,
4762 constants.OOB_POWER_CYCLE,
4764 if result.payload is not None:
4765 errs.append("%s is expected to not return payload but got '%s'" %
4766 (self.op.command, result.payload))
4769 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4770 utils.CommaJoin(errs))
4773 class _OsQuery(_QueryBase):
4774 FIELDS = query.OS_FIELDS
4776 def ExpandNames(self, lu):
4777 # Lock all nodes in shared mode
4778 # Temporary removal of locks, should be reverted later
4779 # TODO: reintroduce locks when they are lighter-weight
4780 lu.needed_locks = {}
4781 #self.share_locks[locking.LEVEL_NODE] = 1
4782 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4784 # The following variables interact with _QueryBase._GetNames
4786 self.wanted = self.names
4788 self.wanted = locking.ALL_SET
4790 self.do_locking = self.use_locking
4792 def DeclareLocks(self, lu, level):
4796 def _DiagnoseByOS(rlist):
4797 """Remaps a per-node return list into an a per-os per-node dictionary
4799 @param rlist: a map with node names as keys and OS objects as values
4802 @return: a dictionary with osnames as keys and as value another
4803 map, with nodes as keys and tuples of (path, status, diagnose,
4804 variants, parameters, api_versions) as values, eg::
4806 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4807 (/srv/..., False, "invalid api")],
4808 "node2": [(/srv/..., True, "", [], [])]}
4813 # we build here the list of nodes that didn't fail the RPC (at RPC
4814 # level), so that nodes with a non-responding node daemon don't
4815 # make all OSes invalid
4816 good_nodes = [node_name for node_name in rlist
4817 if not rlist[node_name].fail_msg]
4818 for node_name, nr in rlist.items():
4819 if nr.fail_msg or not nr.payload:
4821 for (name, path, status, diagnose, variants,
4822 params, api_versions) in nr.payload:
4823 if name not in all_os:
4824 # build a list of nodes for this os containing empty lists
4825 # for each node in node_list
4827 for nname in good_nodes:
4828 all_os[name][nname] = []
4829 # convert params from [name, help] to (name, help)
4830 params = [tuple(v) for v in params]
4831 all_os[name][node_name].append((path, status, diagnose,
4832 variants, params, api_versions))
4835 def _GetQueryData(self, lu):
4836 """Computes the list of nodes and their attributes.
4839 # Locking is not used
4840 assert not (compat.any(lu.glm.is_owned(level)
4841 for level in locking.LEVELS
4842 if level != locking.LEVEL_CLUSTER) or
4843 self.do_locking or self.use_locking)
4845 valid_nodes = [node.name
4846 for node in lu.cfg.GetAllNodesInfo().values()
4847 if not node.offline and node.vm_capable]
4848 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4849 cluster = lu.cfg.GetClusterInfo()
4853 for (os_name, os_data) in pol.items():
4854 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4855 hidden=(os_name in cluster.hidden_os),
4856 blacklisted=(os_name in cluster.blacklisted_os))
4860 api_versions = set()
4862 for idx, osl in enumerate(os_data.values()):
4863 info.valid = bool(info.valid and osl and osl[0][1])
4867 (node_variants, node_params, node_api) = osl[0][3:6]
4870 variants.update(node_variants)
4871 parameters.update(node_params)
4872 api_versions.update(node_api)
4874 # Filter out inconsistent values
4875 variants.intersection_update(node_variants)
4876 parameters.intersection_update(node_params)
4877 api_versions.intersection_update(node_api)
4879 info.variants = list(variants)
4880 info.parameters = list(parameters)
4881 info.api_versions = list(api_versions)
4883 data[os_name] = info
4885 # Prepare data in requested order
4886 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4890 class LUOsDiagnose(NoHooksLU):
4891 """Logical unit for OS diagnose/query.
4897 def _BuildFilter(fields, names):
4898 """Builds a filter for querying OSes.
4901 name_filter = qlang.MakeSimpleFilter("name", names)
4903 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4904 # respective field is not requested
4905 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4906 for fname in ["hidden", "blacklisted"]
4907 if fname not in fields]
4908 if "valid" not in fields:
4909 status_filter.append([qlang.OP_TRUE, "valid"])
4912 status_filter.insert(0, qlang.OP_AND)
4914 status_filter = None
4916 if name_filter and status_filter:
4917 return [qlang.OP_AND, name_filter, status_filter]
4921 return status_filter
4923 def CheckArguments(self):
4924 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4925 self.op.output_fields, False)
4927 def ExpandNames(self):
4928 self.oq.ExpandNames(self)
4930 def Exec(self, feedback_fn):
4931 return self.oq.OldStyleQuery(self)
4934 class LUNodeRemove(LogicalUnit):
4935 """Logical unit for removing a node.
4938 HPATH = "node-remove"
4939 HTYPE = constants.HTYPE_NODE
4941 def BuildHooksEnv(self):
4946 "OP_TARGET": self.op.node_name,
4947 "NODE_NAME": self.op.node_name,
4950 def BuildHooksNodes(self):
4951 """Build hooks nodes.
4953 This doesn't run on the target node in the pre phase as a failed
4954 node would then be impossible to remove.
4957 all_nodes = self.cfg.GetNodeList()
4959 all_nodes.remove(self.op.node_name)
4962 return (all_nodes, all_nodes)
4964 def CheckPrereq(self):
4965 """Check prerequisites.
4968 - the node exists in the configuration
4969 - it does not have primary or secondary instances
4970 - it's not the master
4972 Any errors are signaled by raising errors.OpPrereqError.
4975 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4976 node = self.cfg.GetNodeInfo(self.op.node_name)
4977 assert node is not None
4979 masternode = self.cfg.GetMasterNode()
4980 if node.name == masternode:
4981 raise errors.OpPrereqError("Node is the master node, failover to another"
4982 " node is required", errors.ECODE_INVAL)
4984 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4985 if node.name in instance.all_nodes:
4986 raise errors.OpPrereqError("Instance %s is still running on the node,"
4987 " please remove first" % instance_name,
4989 self.op.node_name = node.name
4992 def Exec(self, feedback_fn):
4993 """Removes the node from the cluster.
4997 logging.info("Stopping the node daemon and removing configs from node %s",
5000 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5002 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5005 # Promote nodes to master candidate as needed
5006 _AdjustCandidatePool(self, exceptions=[node.name])
5007 self.context.RemoveNode(node.name)
5009 # Run post hooks on the node before it's removed
5010 _RunPostHook(self, node.name)
5012 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5013 msg = result.fail_msg
5015 self.LogWarning("Errors encountered on the remote node while leaving"
5016 " the cluster: %s", msg)
5018 # Remove node from our /etc/hosts
5019 if self.cfg.GetClusterInfo().modify_etc_hosts:
5020 master_node = self.cfg.GetMasterNode()
5021 result = self.rpc.call_etc_hosts_modify(master_node,
5022 constants.ETC_HOSTS_REMOVE,
5024 result.Raise("Can't update hosts file with new host data")
5025 _RedistributeAncillaryFiles(self)
5028 class _NodeQuery(_QueryBase):
5029 FIELDS = query.NODE_FIELDS
5031 def ExpandNames(self, lu):
5032 lu.needed_locks = {}
5033 lu.share_locks = _ShareAll()
5036 self.wanted = _GetWantedNodes(lu, self.names)
5038 self.wanted = locking.ALL_SET
5040 self.do_locking = (self.use_locking and
5041 query.NQ_LIVE in self.requested_data)
5044 # If any non-static field is requested we need to lock the nodes
5045 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5047 def DeclareLocks(self, lu, level):
5050 def _GetQueryData(self, lu):
5051 """Computes the list of nodes and their attributes.
5054 all_info = lu.cfg.GetAllNodesInfo()
5056 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5058 # Gather data as requested
5059 if query.NQ_LIVE in self.requested_data:
5060 # filter out non-vm_capable nodes
5061 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5063 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5064 [lu.cfg.GetHypervisorType()])
5065 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5066 for (name, nresult) in node_data.items()
5067 if not nresult.fail_msg and nresult.payload)
5071 if query.NQ_INST in self.requested_data:
5072 node_to_primary = dict([(name, set()) for name in nodenames])
5073 node_to_secondary = dict([(name, set()) for name in nodenames])
5075 inst_data = lu.cfg.GetAllInstancesInfo()
5077 for inst in inst_data.values():
5078 if inst.primary_node in node_to_primary:
5079 node_to_primary[inst.primary_node].add(inst.name)
5080 for secnode in inst.secondary_nodes:
5081 if secnode in node_to_secondary:
5082 node_to_secondary[secnode].add(inst.name)
5084 node_to_primary = None
5085 node_to_secondary = None
5087 if query.NQ_OOB in self.requested_data:
5088 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5089 for name, node in all_info.iteritems())
5093 if query.NQ_GROUP in self.requested_data:
5094 groups = lu.cfg.GetAllNodeGroupsInfo()
5098 return query.NodeQueryData([all_info[name] for name in nodenames],
5099 live_data, lu.cfg.GetMasterNode(),
5100 node_to_primary, node_to_secondary, groups,
5101 oob_support, lu.cfg.GetClusterInfo())
5104 class LUNodeQuery(NoHooksLU):
5105 """Logical unit for querying nodes.
5108 # pylint: disable=W0142
5111 def CheckArguments(self):
5112 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5113 self.op.output_fields, self.op.use_locking)
5115 def ExpandNames(self):
5116 self.nq.ExpandNames(self)
5118 def DeclareLocks(self, level):
5119 self.nq.DeclareLocks(self, level)
5121 def Exec(self, feedback_fn):
5122 return self.nq.OldStyleQuery(self)
5125 class LUNodeQueryvols(NoHooksLU):
5126 """Logical unit for getting volumes on node(s).
5130 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5131 _FIELDS_STATIC = utils.FieldSet("node")
5133 def CheckArguments(self):
5134 _CheckOutputFields(static=self._FIELDS_STATIC,
5135 dynamic=self._FIELDS_DYNAMIC,
5136 selected=self.op.output_fields)
5138 def ExpandNames(self):
5139 self.share_locks = _ShareAll()
5140 self.needed_locks = {}
5142 if not self.op.nodes:
5143 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5145 self.needed_locks[locking.LEVEL_NODE] = \
5146 _GetWantedNodes(self, self.op.nodes)
5148 def Exec(self, feedback_fn):
5149 """Computes the list of nodes and their attributes.
5152 nodenames = self.owned_locks(locking.LEVEL_NODE)
5153 volumes = self.rpc.call_node_volumes(nodenames)
5155 ilist = self.cfg.GetAllInstancesInfo()
5156 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5159 for node in nodenames:
5160 nresult = volumes[node]
5163 msg = nresult.fail_msg
5165 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5168 node_vols = sorted(nresult.payload,
5169 key=operator.itemgetter("dev"))
5171 for vol in node_vols:
5173 for field in self.op.output_fields:
5176 elif field == "phys":
5180 elif field == "name":
5182 elif field == "size":
5183 val = int(float(vol["size"]))
5184 elif field == "instance":
5185 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5187 raise errors.ParameterError(field)
5188 node_output.append(str(val))
5190 output.append(node_output)
5195 class LUNodeQueryStorage(NoHooksLU):
5196 """Logical unit for getting information on storage units on node(s).
5199 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5202 def CheckArguments(self):
5203 _CheckOutputFields(static=self._FIELDS_STATIC,
5204 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5205 selected=self.op.output_fields)
5207 def ExpandNames(self):
5208 self.share_locks = _ShareAll()
5209 self.needed_locks = {}
5212 self.needed_locks[locking.LEVEL_NODE] = \
5213 _GetWantedNodes(self, self.op.nodes)
5215 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5217 def Exec(self, feedback_fn):
5218 """Computes the list of nodes and their attributes.
5221 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5223 # Always get name to sort by
5224 if constants.SF_NAME in self.op.output_fields:
5225 fields = self.op.output_fields[:]
5227 fields = [constants.SF_NAME] + self.op.output_fields
5229 # Never ask for node or type as it's only known to the LU
5230 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5231 while extra in fields:
5232 fields.remove(extra)
5234 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5235 name_idx = field_idx[constants.SF_NAME]
5237 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5238 data = self.rpc.call_storage_list(self.nodes,
5239 self.op.storage_type, st_args,
5240 self.op.name, fields)
5244 for node in utils.NiceSort(self.nodes):
5245 nresult = data[node]
5249 msg = nresult.fail_msg
5251 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5254 rows = dict([(row[name_idx], row) for row in nresult.payload])
5256 for name in utils.NiceSort(rows.keys()):
5261 for field in self.op.output_fields:
5262 if field == constants.SF_NODE:
5264 elif field == constants.SF_TYPE:
5265 val = self.op.storage_type
5266 elif field in field_idx:
5267 val = row[field_idx[field]]
5269 raise errors.ParameterError(field)
5278 class _InstanceQuery(_QueryBase):
5279 FIELDS = query.INSTANCE_FIELDS
5281 def ExpandNames(self, lu):
5282 lu.needed_locks = {}
5283 lu.share_locks = _ShareAll()
5286 self.wanted = _GetWantedInstances(lu, self.names)
5288 self.wanted = locking.ALL_SET
5290 self.do_locking = (self.use_locking and
5291 query.IQ_LIVE in self.requested_data)
5293 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5294 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5295 lu.needed_locks[locking.LEVEL_NODE] = []
5296 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5298 self.do_grouplocks = (self.do_locking and
5299 query.IQ_NODES in self.requested_data)
5301 def DeclareLocks(self, lu, level):
5303 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5304 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5306 # Lock all groups used by instances optimistically; this requires going
5307 # via the node before it's locked, requiring verification later on
5308 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5310 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5311 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5312 elif level == locking.LEVEL_NODE:
5313 lu._LockInstancesNodes() # pylint: disable=W0212
5316 def _CheckGroupLocks(lu):
5317 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5318 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5320 # Check if node groups for locked instances are still correct
5321 for instance_name in owned_instances:
5322 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5324 def _GetQueryData(self, lu):
5325 """Computes the list of instances and their attributes.
5328 if self.do_grouplocks:
5329 self._CheckGroupLocks(lu)
5331 cluster = lu.cfg.GetClusterInfo()
5332 all_info = lu.cfg.GetAllInstancesInfo()
5334 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5336 instance_list = [all_info[name] for name in instance_names]
5337 nodes = frozenset(itertools.chain(*(inst.all_nodes
5338 for inst in instance_list)))
5339 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5342 wrongnode_inst = set()
5344 # Gather data as requested
5345 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5347 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5349 result = node_data[name]
5351 # offline nodes will be in both lists
5352 assert result.fail_msg
5353 offline_nodes.append(name)
5355 bad_nodes.append(name)
5356 elif result.payload:
5357 for inst in result.payload:
5358 if inst in all_info:
5359 if all_info[inst].primary_node == name:
5360 live_data.update(result.payload)
5362 wrongnode_inst.add(inst)
5364 # orphan instance; we don't list it here as we don't
5365 # handle this case yet in the output of instance listing
5366 logging.warning("Orphan instance '%s' found on node %s",
5368 # else no instance is alive
5372 if query.IQ_DISKUSAGE in self.requested_data:
5373 disk_usage = dict((inst.name,
5374 _ComputeDiskSize(inst.disk_template,
5375 [{constants.IDISK_SIZE: disk.size}
5376 for disk in inst.disks]))
5377 for inst in instance_list)
5381 if query.IQ_CONSOLE in self.requested_data:
5383 for inst in instance_list:
5384 if inst.name in live_data:
5385 # Instance is running
5386 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5388 consinfo[inst.name] = None
5389 assert set(consinfo.keys()) == set(instance_names)
5393 if query.IQ_NODES in self.requested_data:
5394 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5396 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5397 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5398 for uuid in set(map(operator.attrgetter("group"),
5404 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5405 disk_usage, offline_nodes, bad_nodes,
5406 live_data, wrongnode_inst, consinfo,
5410 class LUQuery(NoHooksLU):
5411 """Query for resources/items of a certain kind.
5414 # pylint: disable=W0142
5417 def CheckArguments(self):
5418 qcls = _GetQueryImplementation(self.op.what)
5420 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5422 def ExpandNames(self):
5423 self.impl.ExpandNames(self)
5425 def DeclareLocks(self, level):
5426 self.impl.DeclareLocks(self, level)
5428 def Exec(self, feedback_fn):
5429 return self.impl.NewStyleQuery(self)
5432 class LUQueryFields(NoHooksLU):
5433 """Query for resources/items of a certain kind.
5436 # pylint: disable=W0142
5439 def CheckArguments(self):
5440 self.qcls = _GetQueryImplementation(self.op.what)
5442 def ExpandNames(self):
5443 self.needed_locks = {}
5445 def Exec(self, feedback_fn):
5446 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5449 class LUNodeModifyStorage(NoHooksLU):
5450 """Logical unit for modifying a storage volume on a node.
5455 def CheckArguments(self):
5456 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5458 storage_type = self.op.storage_type
5461 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5463 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5464 " modified" % storage_type,
5467 diff = set(self.op.changes.keys()) - modifiable
5469 raise errors.OpPrereqError("The following fields can not be modified for"
5470 " storage units of type '%s': %r" %
5471 (storage_type, list(diff)),
5474 def ExpandNames(self):
5475 self.needed_locks = {
5476 locking.LEVEL_NODE: self.op.node_name,
5479 def Exec(self, feedback_fn):
5480 """Computes the list of nodes and their attributes.
5483 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5484 result = self.rpc.call_storage_modify(self.op.node_name,
5485 self.op.storage_type, st_args,
5486 self.op.name, self.op.changes)
5487 result.Raise("Failed to modify storage unit '%s' on %s" %
5488 (self.op.name, self.op.node_name))
5491 class LUNodeAdd(LogicalUnit):
5492 """Logical unit for adding node to the cluster.
5496 HTYPE = constants.HTYPE_NODE
5497 _NFLAGS = ["master_capable", "vm_capable"]
5499 def CheckArguments(self):
5500 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5501 # validate/normalize the node name
5502 self.hostname = netutils.GetHostname(name=self.op.node_name,
5503 family=self.primary_ip_family)
5504 self.op.node_name = self.hostname.name
5506 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5507 raise errors.OpPrereqError("Cannot readd the master node",
5510 if self.op.readd and self.op.group:
5511 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5512 " being readded", errors.ECODE_INVAL)
5514 def BuildHooksEnv(self):
5517 This will run on all nodes before, and on all nodes + the new node after.
5521 "OP_TARGET": self.op.node_name,
5522 "NODE_NAME": self.op.node_name,
5523 "NODE_PIP": self.op.primary_ip,
5524 "NODE_SIP": self.op.secondary_ip,
5525 "MASTER_CAPABLE": str(self.op.master_capable),
5526 "VM_CAPABLE": str(self.op.vm_capable),
5529 def BuildHooksNodes(self):
5530 """Build hooks nodes.
5533 # Exclude added node
5534 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5535 post_nodes = pre_nodes + [self.op.node_name, ]
5537 return (pre_nodes, post_nodes)
5539 def CheckPrereq(self):
5540 """Check prerequisites.
5543 - the new node is not already in the config
5545 - its parameters (single/dual homed) matches the cluster
5547 Any errors are signaled by raising errors.OpPrereqError.
5551 hostname = self.hostname
5552 node = hostname.name
5553 primary_ip = self.op.primary_ip = hostname.ip
5554 if self.op.secondary_ip is None:
5555 if self.primary_ip_family == netutils.IP6Address.family:
5556 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5557 " IPv4 address must be given as secondary",
5559 self.op.secondary_ip = primary_ip
5561 secondary_ip = self.op.secondary_ip
5562 if not netutils.IP4Address.IsValid(secondary_ip):
5563 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5564 " address" % secondary_ip, errors.ECODE_INVAL)
5566 node_list = cfg.GetNodeList()
5567 if not self.op.readd and node in node_list:
5568 raise errors.OpPrereqError("Node %s is already in the configuration" %
5569 node, errors.ECODE_EXISTS)
5570 elif self.op.readd and node not in node_list:
5571 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5574 self.changed_primary_ip = False
5576 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5577 if self.op.readd and node == existing_node_name:
5578 if existing_node.secondary_ip != secondary_ip:
5579 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5580 " address configuration as before",
5582 if existing_node.primary_ip != primary_ip:
5583 self.changed_primary_ip = True
5587 if (existing_node.primary_ip == primary_ip or
5588 existing_node.secondary_ip == primary_ip or
5589 existing_node.primary_ip == secondary_ip or
5590 existing_node.secondary_ip == secondary_ip):
5591 raise errors.OpPrereqError("New node ip address(es) conflict with"
5592 " existing node %s" % existing_node.name,
5593 errors.ECODE_NOTUNIQUE)
5595 # After this 'if' block, None is no longer a valid value for the
5596 # _capable op attributes
5598 old_node = self.cfg.GetNodeInfo(node)
5599 assert old_node is not None, "Can't retrieve locked node %s" % node
5600 for attr in self._NFLAGS:
5601 if getattr(self.op, attr) is None:
5602 setattr(self.op, attr, getattr(old_node, attr))
5604 for attr in self._NFLAGS:
5605 if getattr(self.op, attr) is None:
5606 setattr(self.op, attr, True)
5608 if self.op.readd and not self.op.vm_capable:
5609 pri, sec = cfg.GetNodeInstances(node)
5611 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5612 " flag set to false, but it already holds"
5613 " instances" % node,
5616 # check that the type of the node (single versus dual homed) is the
5617 # same as for the master
5618 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5619 master_singlehomed = myself.secondary_ip == myself.primary_ip
5620 newbie_singlehomed = secondary_ip == primary_ip
5621 if master_singlehomed != newbie_singlehomed:
5622 if master_singlehomed:
5623 raise errors.OpPrereqError("The master has no secondary ip but the"
5624 " new node has one",
5627 raise errors.OpPrereqError("The master has a secondary ip but the"
5628 " new node doesn't have one",
5631 # checks reachability
5632 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5633 raise errors.OpPrereqError("Node not reachable by ping",
5634 errors.ECODE_ENVIRON)
5636 if not newbie_singlehomed:
5637 # check reachability from my secondary ip to newbie's secondary ip
5638 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5639 source=myself.secondary_ip):
5640 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5641 " based ping to node daemon port",
5642 errors.ECODE_ENVIRON)
5649 if self.op.master_capable:
5650 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5652 self.master_candidate = False
5655 self.new_node = old_node
5657 node_group = cfg.LookupNodeGroup(self.op.group)
5658 self.new_node = objects.Node(name=node,
5659 primary_ip=primary_ip,
5660 secondary_ip=secondary_ip,
5661 master_candidate=self.master_candidate,
5662 offline=False, drained=False,
5665 if self.op.ndparams:
5666 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5668 if self.op.hv_state:
5669 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5671 if self.op.disk_state:
5672 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5674 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5675 # it a property on the base class.
5676 result = rpc.DnsOnlyRunner().call_version([node])[node]
5677 result.Raise("Can't get version information from node %s" % node)
5678 if constants.PROTOCOL_VERSION == result.payload:
5679 logging.info("Communication to node %s fine, sw version %s match",
5680 node, result.payload)
5682 raise errors.OpPrereqError("Version mismatch master version %s,"
5683 " node version %s" %
5684 (constants.PROTOCOL_VERSION, result.payload),
5685 errors.ECODE_ENVIRON)
5687 def Exec(self, feedback_fn):
5688 """Adds the new node to the cluster.
5691 new_node = self.new_node
5692 node = new_node.name
5694 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5697 # We adding a new node so we assume it's powered
5698 new_node.powered = True
5700 # for re-adds, reset the offline/drained/master-candidate flags;
5701 # we need to reset here, otherwise offline would prevent RPC calls
5702 # later in the procedure; this also means that if the re-add
5703 # fails, we are left with a non-offlined, broken node
5705 new_node.drained = new_node.offline = False # pylint: disable=W0201
5706 self.LogInfo("Readding a node, the offline/drained flags were reset")
5707 # if we demote the node, we do cleanup later in the procedure
5708 new_node.master_candidate = self.master_candidate
5709 if self.changed_primary_ip:
5710 new_node.primary_ip = self.op.primary_ip
5712 # copy the master/vm_capable flags
5713 for attr in self._NFLAGS:
5714 setattr(new_node, attr, getattr(self.op, attr))
5716 # notify the user about any possible mc promotion
5717 if new_node.master_candidate:
5718 self.LogInfo("Node will be a master candidate")
5720 if self.op.ndparams:
5721 new_node.ndparams = self.op.ndparams
5723 new_node.ndparams = {}
5725 if self.op.hv_state:
5726 new_node.hv_state_static = self.new_hv_state
5728 if self.op.disk_state:
5729 new_node.disk_state_static = self.new_disk_state
5731 # Add node to our /etc/hosts, and add key to known_hosts
5732 if self.cfg.GetClusterInfo().modify_etc_hosts:
5733 master_node = self.cfg.GetMasterNode()
5734 result = self.rpc.call_etc_hosts_modify(master_node,
5735 constants.ETC_HOSTS_ADD,
5738 result.Raise("Can't update hosts file with new host data")
5740 if new_node.secondary_ip != new_node.primary_ip:
5741 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5744 node_verify_list = [self.cfg.GetMasterNode()]
5745 node_verify_param = {
5746 constants.NV_NODELIST: ([node], {}),
5747 # TODO: do a node-net-test as well?
5750 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5751 self.cfg.GetClusterName())
5752 for verifier in node_verify_list:
5753 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5754 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5756 for failed in nl_payload:
5757 feedback_fn("ssh/hostname verification failed"
5758 " (checking from %s): %s" %
5759 (verifier, nl_payload[failed]))
5760 raise errors.OpExecError("ssh/hostname verification failed")
5763 _RedistributeAncillaryFiles(self)
5764 self.context.ReaddNode(new_node)
5765 # make sure we redistribute the config
5766 self.cfg.Update(new_node, feedback_fn)
5767 # and make sure the new node will not have old files around
5768 if not new_node.master_candidate:
5769 result = self.rpc.call_node_demote_from_mc(new_node.name)
5770 msg = result.fail_msg
5772 self.LogWarning("Node failed to demote itself from master"
5773 " candidate status: %s" % msg)
5775 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5776 additional_vm=self.op.vm_capable)
5777 self.context.AddNode(new_node, self.proc.GetECId())
5780 class LUNodeSetParams(LogicalUnit):
5781 """Modifies the parameters of a node.
5783 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5784 to the node role (as _ROLE_*)
5785 @cvar _R2F: a dictionary from node role to tuples of flags
5786 @cvar _FLAGS: a list of attribute names corresponding to the flags
5789 HPATH = "node-modify"
5790 HTYPE = constants.HTYPE_NODE
5792 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5794 (True, False, False): _ROLE_CANDIDATE,
5795 (False, True, False): _ROLE_DRAINED,
5796 (False, False, True): _ROLE_OFFLINE,
5797 (False, False, False): _ROLE_REGULAR,
5799 _R2F = dict((v, k) for k, v in _F2R.items())
5800 _FLAGS = ["master_candidate", "drained", "offline"]
5802 def CheckArguments(self):
5803 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5804 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5805 self.op.master_capable, self.op.vm_capable,
5806 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5808 if all_mods.count(None) == len(all_mods):
5809 raise errors.OpPrereqError("Please pass at least one modification",
5811 if all_mods.count(True) > 1:
5812 raise errors.OpPrereqError("Can't set the node into more than one"
5813 " state at the same time",
5816 # Boolean value that tells us whether we might be demoting from MC
5817 self.might_demote = (self.op.master_candidate == False or
5818 self.op.offline == True or
5819 self.op.drained == True or
5820 self.op.master_capable == False)
5822 if self.op.secondary_ip:
5823 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5824 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5825 " address" % self.op.secondary_ip,
5828 self.lock_all = self.op.auto_promote and self.might_demote
5829 self.lock_instances = self.op.secondary_ip is not None
5831 def _InstanceFilter(self, instance):
5832 """Filter for getting affected instances.
5835 return (instance.disk_template in constants.DTS_INT_MIRROR and
5836 self.op.node_name in instance.all_nodes)
5838 def ExpandNames(self):
5840 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5842 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5844 # Since modifying a node can have severe effects on currently running
5845 # operations the resource lock is at least acquired in shared mode
5846 self.needed_locks[locking.LEVEL_NODE_RES] = \
5847 self.needed_locks[locking.LEVEL_NODE]
5849 # Get node resource and instance locks in shared mode; they are not used
5850 # for anything but read-only access
5851 self.share_locks[locking.LEVEL_NODE_RES] = 1
5852 self.share_locks[locking.LEVEL_INSTANCE] = 1
5854 if self.lock_instances:
5855 self.needed_locks[locking.LEVEL_INSTANCE] = \
5856 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5858 def BuildHooksEnv(self):
5861 This runs on the master node.
5865 "OP_TARGET": self.op.node_name,
5866 "MASTER_CANDIDATE": str(self.op.master_candidate),
5867 "OFFLINE": str(self.op.offline),
5868 "DRAINED": str(self.op.drained),
5869 "MASTER_CAPABLE": str(self.op.master_capable),
5870 "VM_CAPABLE": str(self.op.vm_capable),
5873 def BuildHooksNodes(self):
5874 """Build hooks nodes.
5877 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5880 def CheckPrereq(self):
5881 """Check prerequisites.
5883 This only checks the instance list against the existing names.
5886 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5888 if self.lock_instances:
5889 affected_instances = \
5890 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5892 # Verify instance locks
5893 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5894 wanted_instances = frozenset(affected_instances.keys())
5895 if wanted_instances - owned_instances:
5896 raise errors.OpPrereqError("Instances affected by changing node %s's"
5897 " secondary IP address have changed since"
5898 " locks were acquired, wanted '%s', have"
5899 " '%s'; retry the operation" %
5901 utils.CommaJoin(wanted_instances),
5902 utils.CommaJoin(owned_instances)),
5905 affected_instances = None
5907 if (self.op.master_candidate is not None or
5908 self.op.drained is not None or
5909 self.op.offline is not None):
5910 # we can't change the master's node flags
5911 if self.op.node_name == self.cfg.GetMasterNode():
5912 raise errors.OpPrereqError("The master role can be changed"
5913 " only via master-failover",
5916 if self.op.master_candidate and not node.master_capable:
5917 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5918 " it a master candidate" % node.name,
5921 if self.op.vm_capable == False:
5922 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5924 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5925 " the vm_capable flag" % node.name,
5928 if node.master_candidate and self.might_demote and not self.lock_all:
5929 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5930 # check if after removing the current node, we're missing master
5932 (mc_remaining, mc_should, _) = \
5933 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5934 if mc_remaining < mc_should:
5935 raise errors.OpPrereqError("Not enough master candidates, please"
5936 " pass auto promote option to allow"
5937 " promotion (--auto-promote or RAPI"
5938 " auto_promote=True)", errors.ECODE_STATE)
5940 self.old_flags = old_flags = (node.master_candidate,
5941 node.drained, node.offline)
5942 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5943 self.old_role = old_role = self._F2R[old_flags]
5945 # Check for ineffective changes
5946 for attr in self._FLAGS:
5947 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5948 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5949 setattr(self.op, attr, None)
5951 # Past this point, any flag change to False means a transition
5952 # away from the respective state, as only real changes are kept
5954 # TODO: We might query the real power state if it supports OOB
5955 if _SupportsOob(self.cfg, node):
5956 if self.op.offline is False and not (node.powered or
5957 self.op.powered == True):
5958 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5959 " offline status can be reset") %
5961 elif self.op.powered is not None:
5962 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5963 " as it does not support out-of-band"
5964 " handling") % self.op.node_name)
5966 # If we're being deofflined/drained, we'll MC ourself if needed
5967 if (self.op.drained == False or self.op.offline == False or
5968 (self.op.master_capable and not node.master_capable)):
5969 if _DecideSelfPromotion(self):
5970 self.op.master_candidate = True
5971 self.LogInfo("Auto-promoting node to master candidate")
5973 # If we're no longer master capable, we'll demote ourselves from MC
5974 if self.op.master_capable == False and node.master_candidate:
5975 self.LogInfo("Demoting from master candidate")
5976 self.op.master_candidate = False
5979 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5980 if self.op.master_candidate:
5981 new_role = self._ROLE_CANDIDATE
5982 elif self.op.drained:
5983 new_role = self._ROLE_DRAINED
5984 elif self.op.offline:
5985 new_role = self._ROLE_OFFLINE
5986 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5987 # False is still in new flags, which means we're un-setting (the
5989 new_role = self._ROLE_REGULAR
5990 else: # no new flags, nothing, keep old role
5993 self.new_role = new_role
5995 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5996 # Trying to transition out of offline status
5997 result = self.rpc.call_version([node.name])[node.name]
5999 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6000 " to report its version: %s" %
6001 (node.name, result.fail_msg),
6004 self.LogWarning("Transitioning node from offline to online state"
6005 " without using re-add. Please make sure the node"
6008 # When changing the secondary ip, verify if this is a single-homed to
6009 # multi-homed transition or vice versa, and apply the relevant
6011 if self.op.secondary_ip:
6012 # Ok even without locking, because this can't be changed by any LU
6013 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6014 master_singlehomed = master.secondary_ip == master.primary_ip
6015 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6016 if self.op.force and node.name == master.name:
6017 self.LogWarning("Transitioning from single-homed to multi-homed"
6018 " cluster. All nodes will require a secondary ip.")
6020 raise errors.OpPrereqError("Changing the secondary ip on a"
6021 " single-homed cluster requires the"
6022 " --force option to be passed, and the"
6023 " target node to be the master",
6025 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6026 if self.op.force and node.name == master.name:
6027 self.LogWarning("Transitioning from multi-homed to single-homed"
6028 " cluster. Secondary IPs will have to be removed.")
6030 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6031 " same as the primary IP on a multi-homed"
6032 " cluster, unless the --force option is"
6033 " passed, and the target node is the"
6034 " master", errors.ECODE_INVAL)
6036 assert not (frozenset(affected_instances) -
6037 self.owned_locks(locking.LEVEL_INSTANCE))
6040 if affected_instances:
6041 raise errors.OpPrereqError("Cannot change secondary IP address:"
6042 " offline node has instances (%s)"
6043 " configured to use it" %
6044 utils.CommaJoin(affected_instances.keys()))
6046 # On online nodes, check that no instances are running, and that
6047 # the node has the new ip and we can reach it.
6048 for instance in affected_instances.values():
6049 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6050 msg="cannot change secondary ip")
6052 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6053 if master.name != node.name:
6054 # check reachability from master secondary ip to new secondary ip
6055 if not netutils.TcpPing(self.op.secondary_ip,
6056 constants.DEFAULT_NODED_PORT,
6057 source=master.secondary_ip):
6058 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6059 " based ping to node daemon port",
6060 errors.ECODE_ENVIRON)
6062 if self.op.ndparams:
6063 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6064 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6065 self.new_ndparams = new_ndparams
6067 if self.op.hv_state:
6068 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6069 self.node.hv_state_static)
6071 if self.op.disk_state:
6072 self.new_disk_state = \
6073 _MergeAndVerifyDiskState(self.op.disk_state,
6074 self.node.disk_state_static)
6076 def Exec(self, feedback_fn):
6081 old_role = self.old_role
6082 new_role = self.new_role
6086 if self.op.ndparams:
6087 node.ndparams = self.new_ndparams
6089 if self.op.powered is not None:
6090 node.powered = self.op.powered
6092 if self.op.hv_state:
6093 node.hv_state_static = self.new_hv_state
6095 if self.op.disk_state:
6096 node.disk_state_static = self.new_disk_state
6098 for attr in ["master_capable", "vm_capable"]:
6099 val = getattr(self.op, attr)
6101 setattr(node, attr, val)
6102 result.append((attr, str(val)))
6104 if new_role != old_role:
6105 # Tell the node to demote itself, if no longer MC and not offline
6106 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6107 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6109 self.LogWarning("Node failed to demote itself: %s", msg)
6111 new_flags = self._R2F[new_role]
6112 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6114 result.append((desc, str(nf)))
6115 (node.master_candidate, node.drained, node.offline) = new_flags
6117 # we locked all nodes, we adjust the CP before updating this node
6119 _AdjustCandidatePool(self, [node.name])
6121 if self.op.secondary_ip:
6122 node.secondary_ip = self.op.secondary_ip
6123 result.append(("secondary_ip", self.op.secondary_ip))
6125 # this will trigger configuration file update, if needed
6126 self.cfg.Update(node, feedback_fn)
6128 # this will trigger job queue propagation or cleanup if the mc
6130 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6131 self.context.ReaddNode(node)
6136 class LUNodePowercycle(NoHooksLU):
6137 """Powercycles a node.
6142 def CheckArguments(self):
6143 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6144 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6145 raise errors.OpPrereqError("The node is the master and the force"
6146 " parameter was not set",
6149 def ExpandNames(self):
6150 """Locking for PowercycleNode.
6152 This is a last-resort option and shouldn't block on other
6153 jobs. Therefore, we grab no locks.
6156 self.needed_locks = {}
6158 def Exec(self, feedback_fn):
6162 result = self.rpc.call_node_powercycle(self.op.node_name,
6163 self.cfg.GetHypervisorType())
6164 result.Raise("Failed to schedule the reboot")
6165 return result.payload
6168 class LUClusterQuery(NoHooksLU):
6169 """Query cluster configuration.
6174 def ExpandNames(self):
6175 self.needed_locks = {}
6177 def Exec(self, feedback_fn):
6178 """Return cluster config.
6181 cluster = self.cfg.GetClusterInfo()
6184 # Filter just for enabled hypervisors
6185 for os_name, hv_dict in cluster.os_hvp.items():
6186 os_hvp[os_name] = {}
6187 for hv_name, hv_params in hv_dict.items():
6188 if hv_name in cluster.enabled_hypervisors:
6189 os_hvp[os_name][hv_name] = hv_params
6191 # Convert ip_family to ip_version
6192 primary_ip_version = constants.IP4_VERSION
6193 if cluster.primary_ip_family == netutils.IP6Address.family:
6194 primary_ip_version = constants.IP6_VERSION
6197 "software_version": constants.RELEASE_VERSION,
6198 "protocol_version": constants.PROTOCOL_VERSION,
6199 "config_version": constants.CONFIG_VERSION,
6200 "os_api_version": max(constants.OS_API_VERSIONS),
6201 "export_version": constants.EXPORT_VERSION,
6202 "architecture": runtime.GetArchInfo(),
6203 "name": cluster.cluster_name,
6204 "master": cluster.master_node,
6205 "default_hypervisor": cluster.primary_hypervisor,
6206 "enabled_hypervisors": cluster.enabled_hypervisors,
6207 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6208 for hypervisor_name in cluster.enabled_hypervisors]),
6210 "beparams": cluster.beparams,
6211 "osparams": cluster.osparams,
6212 "ipolicy": cluster.ipolicy,
6213 "nicparams": cluster.nicparams,
6214 "ndparams": cluster.ndparams,
6215 "diskparams": cluster.diskparams,
6216 "candidate_pool_size": cluster.candidate_pool_size,
6217 "master_netdev": cluster.master_netdev,
6218 "master_netmask": cluster.master_netmask,
6219 "use_external_mip_script": cluster.use_external_mip_script,
6220 "volume_group_name": cluster.volume_group_name,
6221 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6222 "file_storage_dir": cluster.file_storage_dir,
6223 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6224 "maintain_node_health": cluster.maintain_node_health,
6225 "ctime": cluster.ctime,
6226 "mtime": cluster.mtime,
6227 "uuid": cluster.uuid,
6228 "tags": list(cluster.GetTags()),
6229 "uid_pool": cluster.uid_pool,
6230 "default_iallocator": cluster.default_iallocator,
6231 "reserved_lvs": cluster.reserved_lvs,
6232 "primary_ip_version": primary_ip_version,
6233 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6234 "hidden_os": cluster.hidden_os,
6235 "blacklisted_os": cluster.blacklisted_os,
6241 class LUClusterConfigQuery(NoHooksLU):
6242 """Return configuration values.
6247 def CheckArguments(self):
6248 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6250 def ExpandNames(self):
6251 self.cq.ExpandNames(self)
6253 def DeclareLocks(self, level):
6254 self.cq.DeclareLocks(self, level)
6256 def Exec(self, feedback_fn):
6257 result = self.cq.OldStyleQuery(self)
6259 assert len(result) == 1
6264 class _ClusterQuery(_QueryBase):
6265 FIELDS = query.CLUSTER_FIELDS
6267 #: Do not sort (there is only one item)
6270 def ExpandNames(self, lu):
6271 lu.needed_locks = {}
6273 # The following variables interact with _QueryBase._GetNames
6274 self.wanted = locking.ALL_SET
6275 self.do_locking = self.use_locking
6278 raise errors.OpPrereqError("Can not use locking for cluster queries",
6281 def DeclareLocks(self, lu, level):
6284 def _GetQueryData(self, lu):
6285 """Computes the list of nodes and their attributes.
6288 # Locking is not used
6289 assert not (compat.any(lu.glm.is_owned(level)
6290 for level in locking.LEVELS
6291 if level != locking.LEVEL_CLUSTER) or
6292 self.do_locking or self.use_locking)
6294 if query.CQ_CONFIG in self.requested_data:
6295 cluster = lu.cfg.GetClusterInfo()
6297 cluster = NotImplemented
6299 if query.CQ_QUEUE_DRAINED in self.requested_data:
6300 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6302 drain_flag = NotImplemented
6304 if query.CQ_WATCHER_PAUSE in self.requested_data:
6305 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6307 watcher_pause = NotImplemented
6309 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6312 class LUInstanceActivateDisks(NoHooksLU):
6313 """Bring up an instance's disks.
6318 def ExpandNames(self):
6319 self._ExpandAndLockInstance()
6320 self.needed_locks[locking.LEVEL_NODE] = []
6321 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6323 def DeclareLocks(self, level):
6324 if level == locking.LEVEL_NODE:
6325 self._LockInstancesNodes()
6327 def CheckPrereq(self):
6328 """Check prerequisites.
6330 This checks that the instance is in the cluster.
6333 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6334 assert self.instance is not None, \
6335 "Cannot retrieve locked instance %s" % self.op.instance_name
6336 _CheckNodeOnline(self, self.instance.primary_node)
6338 def Exec(self, feedback_fn):
6339 """Activate the disks.
6342 disks_ok, disks_info = \
6343 _AssembleInstanceDisks(self, self.instance,
6344 ignore_size=self.op.ignore_size)
6346 raise errors.OpExecError("Cannot activate block devices")
6348 if self.op.wait_for_sync:
6349 if not _WaitForSync(self, self.instance):
6350 raise errors.OpExecError("Some disks of the instance are degraded!")
6355 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6357 """Prepare the block devices for an instance.
6359 This sets up the block devices on all nodes.
6361 @type lu: L{LogicalUnit}
6362 @param lu: the logical unit on whose behalf we execute
6363 @type instance: L{objects.Instance}
6364 @param instance: the instance for whose disks we assemble
6365 @type disks: list of L{objects.Disk} or None
6366 @param disks: which disks to assemble (or all, if None)
6367 @type ignore_secondaries: boolean
6368 @param ignore_secondaries: if true, errors on secondary nodes
6369 won't result in an error return from the function
6370 @type ignore_size: boolean
6371 @param ignore_size: if true, the current known size of the disk
6372 will not be used during the disk activation, useful for cases
6373 when the size is wrong
6374 @return: False if the operation failed, otherwise a list of
6375 (host, instance_visible_name, node_visible_name)
6376 with the mapping from node devices to instance devices
6381 iname = instance.name
6382 disks = _ExpandCheckDisks(instance, disks)
6384 # With the two passes mechanism we try to reduce the window of
6385 # opportunity for the race condition of switching DRBD to primary
6386 # before handshaking occured, but we do not eliminate it
6388 # The proper fix would be to wait (with some limits) until the
6389 # connection has been made and drbd transitions from WFConnection
6390 # into any other network-connected state (Connected, SyncTarget,
6393 # 1st pass, assemble on all nodes in secondary mode
6394 for idx, inst_disk in enumerate(disks):
6395 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6397 node_disk = node_disk.Copy()
6398 node_disk.UnsetSize()
6399 lu.cfg.SetDiskID(node_disk, node)
6400 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6402 msg = result.fail_msg
6404 is_offline_secondary = (node in instance.secondary_nodes and
6406 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6407 " (is_primary=False, pass=1): %s",
6408 inst_disk.iv_name, node, msg)
6409 if not (ignore_secondaries or is_offline_secondary):
6412 # FIXME: race condition on drbd migration to primary
6414 # 2nd pass, do only the primary node
6415 for idx, inst_disk in enumerate(disks):
6418 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6419 if node != instance.primary_node:
6422 node_disk = node_disk.Copy()
6423 node_disk.UnsetSize()
6424 lu.cfg.SetDiskID(node_disk, node)
6425 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6427 msg = result.fail_msg
6429 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6430 " (is_primary=True, pass=2): %s",
6431 inst_disk.iv_name, node, msg)
6434 dev_path = result.payload
6436 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6438 # leave the disks configured for the primary node
6439 # this is a workaround that would be fixed better by
6440 # improving the logical/physical id handling
6442 lu.cfg.SetDiskID(disk, instance.primary_node)
6444 return disks_ok, device_info
6447 def _StartInstanceDisks(lu, instance, force):
6448 """Start the disks of an instance.
6451 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6452 ignore_secondaries=force)
6454 _ShutdownInstanceDisks(lu, instance)
6455 if force is not None and not force:
6456 lu.proc.LogWarning("", hint="If the message above refers to a"
6458 " you can retry the operation using '--force'.")
6459 raise errors.OpExecError("Disk consistency error")
6462 class LUInstanceDeactivateDisks(NoHooksLU):
6463 """Shutdown an instance's disks.
6468 def ExpandNames(self):
6469 self._ExpandAndLockInstance()
6470 self.needed_locks[locking.LEVEL_NODE] = []
6471 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6473 def DeclareLocks(self, level):
6474 if level == locking.LEVEL_NODE:
6475 self._LockInstancesNodes()
6477 def CheckPrereq(self):
6478 """Check prerequisites.
6480 This checks that the instance is in the cluster.
6483 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6484 assert self.instance is not None, \
6485 "Cannot retrieve locked instance %s" % self.op.instance_name
6487 def Exec(self, feedback_fn):
6488 """Deactivate the disks
6491 instance = self.instance
6493 _ShutdownInstanceDisks(self, instance)
6495 _SafeShutdownInstanceDisks(self, instance)
6498 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6499 """Shutdown block devices of an instance.
6501 This function checks if an instance is running, before calling
6502 _ShutdownInstanceDisks.
6505 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6506 _ShutdownInstanceDisks(lu, instance, disks=disks)
6509 def _ExpandCheckDisks(instance, disks):
6510 """Return the instance disks selected by the disks list
6512 @type disks: list of L{objects.Disk} or None
6513 @param disks: selected disks
6514 @rtype: list of L{objects.Disk}
6515 @return: selected instance disks to act on
6519 return instance.disks
6521 if not set(disks).issubset(instance.disks):
6522 raise errors.ProgrammerError("Can only act on disks belonging to the"
6527 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6528 """Shutdown block devices of an instance.
6530 This does the shutdown on all nodes of the instance.
6532 If the ignore_primary is false, errors on the primary node are
6537 disks = _ExpandCheckDisks(instance, disks)
6540 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6541 lu.cfg.SetDiskID(top_disk, node)
6542 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6543 msg = result.fail_msg
6545 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6546 disk.iv_name, node, msg)
6547 if ((node == instance.primary_node and not ignore_primary) or
6548 (node != instance.primary_node and not result.offline)):
6553 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6554 """Checks if a node has enough free memory.
6556 This function check if a given node has the needed amount of free
6557 memory. In case the node has less memory or we cannot get the
6558 information from the node, this function raise an OpPrereqError
6561 @type lu: C{LogicalUnit}
6562 @param lu: a logical unit from which we get configuration data
6564 @param node: the node to check
6565 @type reason: C{str}
6566 @param reason: string to use in the error message
6567 @type requested: C{int}
6568 @param requested: the amount of memory in MiB to check for
6569 @type hypervisor_name: C{str}
6570 @param hypervisor_name: the hypervisor to ask for memory stats
6572 @return: node current free memory
6573 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6574 we cannot check the node
6577 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6578 nodeinfo[node].Raise("Can't get data from node %s" % node,
6579 prereq=True, ecode=errors.ECODE_ENVIRON)
6580 (_, _, (hv_info, )) = nodeinfo[node].payload
6582 free_mem = hv_info.get("memory_free", None)
6583 if not isinstance(free_mem, int):
6584 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6585 " was '%s'" % (node, free_mem),
6586 errors.ECODE_ENVIRON)
6587 if requested > free_mem:
6588 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6589 " needed %s MiB, available %s MiB" %
6590 (node, reason, requested, free_mem),
6595 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6596 """Checks if nodes have enough free disk space in the all VGs.
6598 This function check if all given nodes have the needed amount of
6599 free disk. In case any node has less disk or we cannot get the
6600 information from the node, this function raise an OpPrereqError
6603 @type lu: C{LogicalUnit}
6604 @param lu: a logical unit from which we get configuration data
6605 @type nodenames: C{list}
6606 @param nodenames: the list of node names to check
6607 @type req_sizes: C{dict}
6608 @param req_sizes: the hash of vg and corresponding amount of disk in
6610 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6611 or we cannot check the node
6614 for vg, req_size in req_sizes.items():
6615 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6618 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6619 """Checks if nodes have enough free disk space in the specified VG.
6621 This function check if all given nodes have the needed amount of
6622 free disk. In case any node has less disk or we cannot get the
6623 information from the node, this function raise an OpPrereqError
6626 @type lu: C{LogicalUnit}
6627 @param lu: a logical unit from which we get configuration data
6628 @type nodenames: C{list}
6629 @param nodenames: the list of node names to check
6631 @param vg: the volume group to check
6632 @type requested: C{int}
6633 @param requested: the amount of disk in MiB to check for
6634 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6635 or we cannot check the node
6638 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6639 for node in nodenames:
6640 info = nodeinfo[node]
6641 info.Raise("Cannot get current information from node %s" % node,
6642 prereq=True, ecode=errors.ECODE_ENVIRON)
6643 (_, (vg_info, ), _) = info.payload
6644 vg_free = vg_info.get("vg_free", None)
6645 if not isinstance(vg_free, int):
6646 raise errors.OpPrereqError("Can't compute free disk space on node"
6647 " %s for vg %s, result was '%s'" %
6648 (node, vg, vg_free), errors.ECODE_ENVIRON)
6649 if requested > vg_free:
6650 raise errors.OpPrereqError("Not enough disk space on target node %s"
6651 " vg %s: required %d MiB, available %d MiB" %
6652 (node, vg, requested, vg_free),
6656 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6657 """Checks if nodes have enough physical CPUs
6659 This function checks if all given nodes have the needed number of
6660 physical CPUs. In case any node has less CPUs or we cannot get the
6661 information from the node, this function raises an OpPrereqError
6664 @type lu: C{LogicalUnit}
6665 @param lu: a logical unit from which we get configuration data
6666 @type nodenames: C{list}
6667 @param nodenames: the list of node names to check
6668 @type requested: C{int}
6669 @param requested: the minimum acceptable number of physical CPUs
6670 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6671 or we cannot check the node
6674 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6675 for node in nodenames:
6676 info = nodeinfo[node]
6677 info.Raise("Cannot get current information from node %s" % node,
6678 prereq=True, ecode=errors.ECODE_ENVIRON)
6679 (_, _, (hv_info, )) = info.payload
6680 num_cpus = hv_info.get("cpu_total", None)
6681 if not isinstance(num_cpus, int):
6682 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6683 " on node %s, result was '%s'" %
6684 (node, num_cpus), errors.ECODE_ENVIRON)
6685 if requested > num_cpus:
6686 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6687 "required" % (node, num_cpus, requested),
6691 class LUInstanceStartup(LogicalUnit):
6692 """Starts an instance.
6695 HPATH = "instance-start"
6696 HTYPE = constants.HTYPE_INSTANCE
6699 def CheckArguments(self):
6701 if self.op.beparams:
6702 # fill the beparams dict
6703 objects.UpgradeBeParams(self.op.beparams)
6704 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6706 def ExpandNames(self):
6707 self._ExpandAndLockInstance()
6708 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6710 def DeclareLocks(self, level):
6711 if level == locking.LEVEL_NODE_RES:
6712 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6714 def BuildHooksEnv(self):
6717 This runs on master, primary and secondary nodes of the instance.
6721 "FORCE": self.op.force,
6724 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6728 def BuildHooksNodes(self):
6729 """Build hooks nodes.
6732 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6735 def CheckPrereq(self):
6736 """Check prerequisites.
6738 This checks that the instance is in the cluster.
6741 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6742 assert self.instance is not None, \
6743 "Cannot retrieve locked instance %s" % self.op.instance_name
6746 if self.op.hvparams:
6747 # check hypervisor parameter syntax (locally)
6748 cluster = self.cfg.GetClusterInfo()
6749 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6750 filled_hvp = cluster.FillHV(instance)
6751 filled_hvp.update(self.op.hvparams)
6752 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6753 hv_type.CheckParameterSyntax(filled_hvp)
6754 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6756 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6758 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6760 if self.primary_offline and self.op.ignore_offline_nodes:
6761 self.proc.LogWarning("Ignoring offline primary node")
6763 if self.op.hvparams or self.op.beparams:
6764 self.proc.LogWarning("Overridden parameters are ignored")
6766 _CheckNodeOnline(self, instance.primary_node)
6768 bep = self.cfg.GetClusterInfo().FillBE(instance)
6769 bep.update(self.op.beparams)
6771 # check bridges existence
6772 _CheckInstanceBridgesExist(self, instance)
6774 remote_info = self.rpc.call_instance_info(instance.primary_node,
6776 instance.hypervisor)
6777 remote_info.Raise("Error checking node %s" % instance.primary_node,
6778 prereq=True, ecode=errors.ECODE_ENVIRON)
6779 if not remote_info.payload: # not running already
6780 _CheckNodeFreeMemory(self, instance.primary_node,
6781 "starting instance %s" % instance.name,
6782 bep[constants.BE_MINMEM], instance.hypervisor)
6784 def Exec(self, feedback_fn):
6785 """Start the instance.
6788 instance = self.instance
6789 force = self.op.force
6791 if not self.op.no_remember:
6792 self.cfg.MarkInstanceUp(instance.name)
6794 if self.primary_offline:
6795 assert self.op.ignore_offline_nodes
6796 self.proc.LogInfo("Primary node offline, marked instance as started")
6798 node_current = instance.primary_node
6800 _StartInstanceDisks(self, instance, force)
6803 self.rpc.call_instance_start(node_current,
6804 (instance, self.op.hvparams,
6806 self.op.startup_paused)
6807 msg = result.fail_msg
6809 _ShutdownInstanceDisks(self, instance)
6810 raise errors.OpExecError("Could not start instance: %s" % msg)
6813 class LUInstanceReboot(LogicalUnit):
6814 """Reboot an instance.
6817 HPATH = "instance-reboot"
6818 HTYPE = constants.HTYPE_INSTANCE
6821 def ExpandNames(self):
6822 self._ExpandAndLockInstance()
6824 def BuildHooksEnv(self):
6827 This runs on master, primary and secondary nodes of the instance.
6831 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6832 "REBOOT_TYPE": self.op.reboot_type,
6833 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6836 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6840 def BuildHooksNodes(self):
6841 """Build hooks nodes.
6844 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6847 def CheckPrereq(self):
6848 """Check prerequisites.
6850 This checks that the instance is in the cluster.
6853 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6854 assert self.instance is not None, \
6855 "Cannot retrieve locked instance %s" % self.op.instance_name
6856 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6857 _CheckNodeOnline(self, instance.primary_node)
6859 # check bridges existence
6860 _CheckInstanceBridgesExist(self, instance)
6862 def Exec(self, feedback_fn):
6863 """Reboot the instance.
6866 instance = self.instance
6867 ignore_secondaries = self.op.ignore_secondaries
6868 reboot_type = self.op.reboot_type
6870 remote_info = self.rpc.call_instance_info(instance.primary_node,
6872 instance.hypervisor)
6873 remote_info.Raise("Error checking node %s" % instance.primary_node)
6874 instance_running = bool(remote_info.payload)
6876 node_current = instance.primary_node
6878 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6879 constants.INSTANCE_REBOOT_HARD]:
6880 for disk in instance.disks:
6881 self.cfg.SetDiskID(disk, node_current)
6882 result = self.rpc.call_instance_reboot(node_current, instance,
6884 self.op.shutdown_timeout)
6885 result.Raise("Could not reboot instance")
6887 if instance_running:
6888 result = self.rpc.call_instance_shutdown(node_current, instance,
6889 self.op.shutdown_timeout)
6890 result.Raise("Could not shutdown instance for full reboot")
6891 _ShutdownInstanceDisks(self, instance)
6893 self.LogInfo("Instance %s was already stopped, starting now",
6895 _StartInstanceDisks(self, instance, ignore_secondaries)
6896 result = self.rpc.call_instance_start(node_current,
6897 (instance, None, None), False)
6898 msg = result.fail_msg
6900 _ShutdownInstanceDisks(self, instance)
6901 raise errors.OpExecError("Could not start instance for"
6902 " full reboot: %s" % msg)
6904 self.cfg.MarkInstanceUp(instance.name)
6907 class LUInstanceShutdown(LogicalUnit):
6908 """Shutdown an instance.
6911 HPATH = "instance-stop"
6912 HTYPE = constants.HTYPE_INSTANCE
6915 def ExpandNames(self):
6916 self._ExpandAndLockInstance()
6918 def BuildHooksEnv(self):
6921 This runs on master, primary and secondary nodes of the instance.
6924 env = _BuildInstanceHookEnvByObject(self, self.instance)
6925 env["TIMEOUT"] = self.op.timeout
6928 def BuildHooksNodes(self):
6929 """Build hooks nodes.
6932 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6935 def CheckPrereq(self):
6936 """Check prerequisites.
6938 This checks that the instance is in the cluster.
6941 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6942 assert self.instance is not None, \
6943 "Cannot retrieve locked instance %s" % self.op.instance_name
6945 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6947 self.primary_offline = \
6948 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6950 if self.primary_offline and self.op.ignore_offline_nodes:
6951 self.proc.LogWarning("Ignoring offline primary node")
6953 _CheckNodeOnline(self, self.instance.primary_node)
6955 def Exec(self, feedback_fn):
6956 """Shutdown the instance.
6959 instance = self.instance
6960 node_current = instance.primary_node
6961 timeout = self.op.timeout
6963 if not self.op.no_remember:
6964 self.cfg.MarkInstanceDown(instance.name)
6966 if self.primary_offline:
6967 assert self.op.ignore_offline_nodes
6968 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6970 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6971 msg = result.fail_msg
6973 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6975 _ShutdownInstanceDisks(self, instance)
6978 class LUInstanceReinstall(LogicalUnit):
6979 """Reinstall an instance.
6982 HPATH = "instance-reinstall"
6983 HTYPE = constants.HTYPE_INSTANCE
6986 def ExpandNames(self):
6987 self._ExpandAndLockInstance()
6989 def BuildHooksEnv(self):
6992 This runs on master, primary and secondary nodes of the instance.
6995 return _BuildInstanceHookEnvByObject(self, self.instance)
6997 def BuildHooksNodes(self):
6998 """Build hooks nodes.
7001 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7004 def CheckPrereq(self):
7005 """Check prerequisites.
7007 This checks that the instance is in the cluster and is not running.
7010 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7011 assert instance is not None, \
7012 "Cannot retrieve locked instance %s" % self.op.instance_name
7013 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7014 " offline, cannot reinstall")
7016 if instance.disk_template == constants.DT_DISKLESS:
7017 raise errors.OpPrereqError("Instance '%s' has no disks" %
7018 self.op.instance_name,
7020 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7022 if self.op.os_type is not None:
7024 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7025 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7026 instance_os = self.op.os_type
7028 instance_os = instance.os
7030 nodelist = list(instance.all_nodes)
7032 if self.op.osparams:
7033 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7034 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7035 self.os_inst = i_osdict # the new dict (without defaults)
7039 self.instance = instance
7041 def Exec(self, feedback_fn):
7042 """Reinstall the instance.
7045 inst = self.instance
7047 if self.op.os_type is not None:
7048 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7049 inst.os = self.op.os_type
7050 # Write to configuration
7051 self.cfg.Update(inst, feedback_fn)
7053 _StartInstanceDisks(self, inst, None)
7055 feedback_fn("Running the instance OS create scripts...")
7056 # FIXME: pass debug option from opcode to backend
7057 result = self.rpc.call_instance_os_add(inst.primary_node,
7058 (inst, self.os_inst), True,
7059 self.op.debug_level)
7060 result.Raise("Could not install OS for instance %s on node %s" %
7061 (inst.name, inst.primary_node))
7063 _ShutdownInstanceDisks(self, inst)
7066 class LUInstanceRecreateDisks(LogicalUnit):
7067 """Recreate an instance's missing disks.
7070 HPATH = "instance-recreate-disks"
7071 HTYPE = constants.HTYPE_INSTANCE
7074 _MODIFYABLE = frozenset([
7075 constants.IDISK_SIZE,
7076 constants.IDISK_MODE,
7079 # New or changed disk parameters may have different semantics
7080 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7081 constants.IDISK_ADOPT,
7083 # TODO: Implement support changing VG while recreating
7085 constants.IDISK_METAVG,
7088 def _RunAllocator(self):
7089 """Run the allocator based on input opcode.
7092 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7095 # The allocator should actually run in "relocate" mode, but current
7096 # allocators don't support relocating all the nodes of an instance at
7097 # the same time. As a workaround we use "allocate" mode, but this is
7098 # suboptimal for two reasons:
7099 # - The instance name passed to the allocator is present in the list of
7100 # existing instances, so there could be a conflict within the
7101 # internal structures of the allocator. This doesn't happen with the
7102 # current allocators, but it's a liability.
7103 # - The allocator counts the resources used by the instance twice: once
7104 # because the instance exists already, and once because it tries to
7105 # allocate a new instance.
7106 # The allocator could choose some of the nodes on which the instance is
7107 # running, but that's not a problem. If the instance nodes are broken,
7108 # they should be already be marked as drained or offline, and hence
7109 # skipped by the allocator. If instance disks have been lost for other
7110 # reasons, then recreating the disks on the same nodes should be fine.
7111 ial = IAllocator(self.cfg, self.rpc,
7112 mode=constants.IALLOCATOR_MODE_ALLOC,
7113 name=self.op.instance_name,
7114 disk_template=self.instance.disk_template,
7115 tags=list(self.instance.GetTags()),
7116 os=self.instance.os,
7118 vcpus=be_full[constants.BE_VCPUS],
7119 memory=be_full[constants.BE_MAXMEM],
7120 spindle_use=be_full[constants.BE_SPINDLE_USE],
7121 disks=[{constants.IDISK_SIZE: d.size,
7122 constants.IDISK_MODE: d.mode}
7123 for d in self.instance.disks],
7124 hypervisor=self.instance.hypervisor)
7126 assert ial.required_nodes == len(self.instance.all_nodes)
7128 ial.Run(self.op.iallocator)
7131 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7132 " %s" % (self.op.iallocator, ial.info),
7135 if len(ial.result) != ial.required_nodes:
7136 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7137 " of nodes (%s), required %s" %
7138 (self.op.iallocator, len(ial.result),
7139 ial.required_nodes), errors.ECODE_FAULT)
7141 self.op.nodes = ial.result
7142 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7143 self.op.instance_name, self.op.iallocator,
7144 utils.CommaJoin(ial.result))
7146 def CheckArguments(self):
7147 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7148 # Normalize and convert deprecated list of disk indices
7149 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7151 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7153 raise errors.OpPrereqError("Some disks have been specified more than"
7154 " once: %s" % utils.CommaJoin(duplicates),
7157 if self.op.iallocator and self.op.nodes:
7158 raise errors.OpPrereqError("Give either the iallocator or the new"
7159 " nodes, not both", errors.ECODE_INVAL)
7161 for (idx, params) in self.op.disks:
7162 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7163 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7165 raise errors.OpPrereqError("Parameters for disk %s try to change"
7166 " unmodifyable parameter(s): %s" %
7167 (idx, utils.CommaJoin(unsupported)),
7170 def ExpandNames(self):
7171 self._ExpandAndLockInstance()
7172 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7174 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7175 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7177 self.needed_locks[locking.LEVEL_NODE] = []
7178 if self.op.iallocator:
7179 # iallocator will select a new node in the same group
7180 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7181 self.needed_locks[locking.LEVEL_NODE_RES] = []
7183 def DeclareLocks(self, level):
7184 if level == locking.LEVEL_NODEGROUP:
7185 assert self.op.iallocator is not None
7186 assert not self.op.nodes
7187 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7188 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7189 # Lock the primary group used by the instance optimistically; this
7190 # requires going via the node before it's locked, requiring
7191 # verification later on
7192 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7193 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7195 elif level == locking.LEVEL_NODE:
7196 # If an allocator is used, then we lock all the nodes in the current
7197 # instance group, as we don't know yet which ones will be selected;
7198 # if we replace the nodes without using an allocator, we only need to
7199 # lock the old primary for doing RPCs (FIXME: we don't lock nodes for
7200 # RPC anymore), otherwise we need to lock all the instance nodes for
7202 if self.op.iallocator:
7203 assert not self.op.nodes
7204 assert not self.needed_locks[locking.LEVEL_NODE]
7205 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7207 # Lock member nodes of the group of the primary node
7208 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7209 self.needed_locks[locking.LEVEL_NODE].extend(
7210 self.cfg.GetNodeGroup(group_uuid).members)
7212 primary_only = bool(self.op.nodes)
7213 self._LockInstancesNodes(primary_only=primary_only)
7214 elif level == locking.LEVEL_NODE_RES:
7216 self.needed_locks[locking.LEVEL_NODE_RES] = \
7217 self.needed_locks[locking.LEVEL_NODE][:]
7219 def BuildHooksEnv(self):
7222 This runs on master, primary and secondary nodes of the instance.
7225 return _BuildInstanceHookEnvByObject(self, self.instance)
7227 def BuildHooksNodes(self):
7228 """Build hooks nodes.
7231 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7234 def CheckPrereq(self):
7235 """Check prerequisites.
7237 This checks that the instance is in the cluster and is not running.
7240 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7241 assert instance is not None, \
7242 "Cannot retrieve locked instance %s" % self.op.instance_name
7244 if len(self.op.nodes) != len(instance.all_nodes):
7245 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7246 " %d replacement nodes were specified" %
7247 (instance.name, len(instance.all_nodes),
7248 len(self.op.nodes)),
7250 assert instance.disk_template != constants.DT_DRBD8 or \
7251 len(self.op.nodes) == 2
7252 assert instance.disk_template != constants.DT_PLAIN or \
7253 len(self.op.nodes) == 1
7254 primary_node = self.op.nodes[0]
7256 primary_node = instance.primary_node
7257 if not self.op.iallocator:
7258 _CheckNodeOnline(self, primary_node)
7260 if instance.disk_template == constants.DT_DISKLESS:
7261 raise errors.OpPrereqError("Instance '%s' has no disks" %
7262 self.op.instance_name, errors.ECODE_INVAL)
7264 # Verify if node group locks are still correct
7265 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7267 # Node group locks are acquired only for the primary node (and only
7268 # when the allocator is used)
7269 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7272 # if we replace nodes *and* the old primary is offline, we don't
7274 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7275 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7276 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7277 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7278 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7279 msg="cannot recreate disks")
7282 self.disks = dict(self.op.disks)
7284 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7286 maxidx = max(self.disks.keys())
7287 if maxidx >= len(instance.disks):
7288 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7291 if ((self.op.nodes or self.op.iallocator) and
7292 sorted(self.disks.keys()) != range(len(instance.disks))):
7293 raise errors.OpPrereqError("Can't recreate disks partially and"
7294 " change the nodes at the same time",
7297 self.instance = instance
7299 if self.op.iallocator:
7300 self._RunAllocator()
7302 # Release unneeded node and node resource locks
7303 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7304 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7306 def Exec(self, feedback_fn):
7307 """Recreate the disks.
7310 instance = self.instance
7312 assert (self.owned_locks(locking.LEVEL_NODE) ==
7313 self.owned_locks(locking.LEVEL_NODE_RES))
7316 mods = [] # keeps track of needed changes
7318 for idx, disk in enumerate(instance.disks):
7320 changes = self.disks[idx]
7322 # Disk should not be recreated
7326 # update secondaries for disks, if needed
7327 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7328 # need to update the nodes and minors
7329 assert len(self.op.nodes) == 2
7330 assert len(disk.logical_id) == 6 # otherwise disk internals
7332 (_, _, old_port, _, _, old_secret) = disk.logical_id
7333 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7334 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7335 new_minors[0], new_minors[1], old_secret)
7336 assert len(disk.logical_id) == len(new_id)
7340 mods.append((idx, new_id, changes))
7342 # now that we have passed all asserts above, we can apply the mods
7343 # in a single run (to avoid partial changes)
7344 for idx, new_id, changes in mods:
7345 disk = instance.disks[idx]
7346 if new_id is not None:
7347 assert disk.dev_type == constants.LD_DRBD8
7348 disk.logical_id = new_id
7350 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7351 mode=changes.get(constants.IDISK_MODE, None))
7353 # change primary node, if needed
7355 instance.primary_node = self.op.nodes[0]
7356 self.LogWarning("Changing the instance's nodes, you will have to"
7357 " remove any disks left on the older nodes manually")
7360 self.cfg.Update(instance, feedback_fn)
7362 _CreateDisks(self, instance, to_skip=to_skip)
7365 class LUInstanceRename(LogicalUnit):
7366 """Rename an instance.
7369 HPATH = "instance-rename"
7370 HTYPE = constants.HTYPE_INSTANCE
7372 def CheckArguments(self):
7376 if self.op.ip_check and not self.op.name_check:
7377 # TODO: make the ip check more flexible and not depend on the name check
7378 raise errors.OpPrereqError("IP address check requires a name check",
7381 def BuildHooksEnv(self):
7384 This runs on master, primary and secondary nodes of the instance.
7387 env = _BuildInstanceHookEnvByObject(self, self.instance)
7388 env["INSTANCE_NEW_NAME"] = self.op.new_name
7391 def BuildHooksNodes(self):
7392 """Build hooks nodes.
7395 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7398 def CheckPrereq(self):
7399 """Check prerequisites.
7401 This checks that the instance is in the cluster and is not running.
7404 self.op.instance_name = _ExpandInstanceName(self.cfg,
7405 self.op.instance_name)
7406 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7407 assert instance is not None
7408 _CheckNodeOnline(self, instance.primary_node)
7409 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7410 msg="cannot rename")
7411 self.instance = instance
7413 new_name = self.op.new_name
7414 if self.op.name_check:
7415 hostname = netutils.GetHostname(name=new_name)
7416 if hostname.name != new_name:
7417 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7419 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7420 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7421 " same as given hostname '%s'") %
7422 (hostname.name, self.op.new_name),
7424 new_name = self.op.new_name = hostname.name
7425 if (self.op.ip_check and
7426 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7427 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7428 (hostname.ip, new_name),
7429 errors.ECODE_NOTUNIQUE)
7431 instance_list = self.cfg.GetInstanceList()
7432 if new_name in instance_list and new_name != instance.name:
7433 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7434 new_name, errors.ECODE_EXISTS)
7436 def Exec(self, feedback_fn):
7437 """Rename the instance.
7440 inst = self.instance
7441 old_name = inst.name
7443 rename_file_storage = False
7444 if (inst.disk_template in constants.DTS_FILEBASED and
7445 self.op.new_name != inst.name):
7446 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7447 rename_file_storage = True
7449 self.cfg.RenameInstance(inst.name, self.op.new_name)
7450 # Change the instance lock. This is definitely safe while we hold the BGL.
7451 # Otherwise the new lock would have to be added in acquired mode.
7453 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7454 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7456 # re-read the instance from the configuration after rename
7457 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7459 if rename_file_storage:
7460 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7461 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7462 old_file_storage_dir,
7463 new_file_storage_dir)
7464 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7465 " (but the instance has been renamed in Ganeti)" %
7466 (inst.primary_node, old_file_storage_dir,
7467 new_file_storage_dir))
7469 _StartInstanceDisks(self, inst, None)
7471 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7472 old_name, self.op.debug_level)
7473 msg = result.fail_msg
7475 msg = ("Could not run OS rename script for instance %s on node %s"
7476 " (but the instance has been renamed in Ganeti): %s" %
7477 (inst.name, inst.primary_node, msg))
7478 self.proc.LogWarning(msg)
7480 _ShutdownInstanceDisks(self, inst)
7485 class LUInstanceRemove(LogicalUnit):
7486 """Remove an instance.
7489 HPATH = "instance-remove"
7490 HTYPE = constants.HTYPE_INSTANCE
7493 def ExpandNames(self):
7494 self._ExpandAndLockInstance()
7495 self.needed_locks[locking.LEVEL_NODE] = []
7496 self.needed_locks[locking.LEVEL_NODE_RES] = []
7497 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7499 def DeclareLocks(self, level):
7500 if level == locking.LEVEL_NODE:
7501 self._LockInstancesNodes()
7502 elif level == locking.LEVEL_NODE_RES:
7504 self.needed_locks[locking.LEVEL_NODE_RES] = \
7505 self.needed_locks[locking.LEVEL_NODE][:]
7507 def BuildHooksEnv(self):
7510 This runs on master, primary and secondary nodes of the instance.
7513 env = _BuildInstanceHookEnvByObject(self, self.instance)
7514 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7517 def BuildHooksNodes(self):
7518 """Build hooks nodes.
7521 nl = [self.cfg.GetMasterNode()]
7522 nl_post = list(self.instance.all_nodes) + nl
7523 return (nl, nl_post)
7525 def CheckPrereq(self):
7526 """Check prerequisites.
7528 This checks that the instance is in the cluster.
7531 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7532 assert self.instance is not None, \
7533 "Cannot retrieve locked instance %s" % self.op.instance_name
7535 def Exec(self, feedback_fn):
7536 """Remove the instance.
7539 instance = self.instance
7540 logging.info("Shutting down instance %s on node %s",
7541 instance.name, instance.primary_node)
7543 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7544 self.op.shutdown_timeout)
7545 msg = result.fail_msg
7547 if self.op.ignore_failures:
7548 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7550 raise errors.OpExecError("Could not shutdown instance %s on"
7552 (instance.name, instance.primary_node, msg))
7554 assert (self.owned_locks(locking.LEVEL_NODE) ==
7555 self.owned_locks(locking.LEVEL_NODE_RES))
7556 assert not (set(instance.all_nodes) -
7557 self.owned_locks(locking.LEVEL_NODE)), \
7558 "Not owning correct locks"
7560 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7563 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7564 """Utility function to remove an instance.
7567 logging.info("Removing block devices for instance %s", instance.name)
7569 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7570 if not ignore_failures:
7571 raise errors.OpExecError("Can't remove instance's disks")
7572 feedback_fn("Warning: can't remove instance's disks")
7574 logging.info("Removing instance %s out of cluster config", instance.name)
7576 lu.cfg.RemoveInstance(instance.name)
7578 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7579 "Instance lock removal conflict"
7581 # Remove lock for the instance
7582 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7585 class LUInstanceQuery(NoHooksLU):
7586 """Logical unit for querying instances.
7589 # pylint: disable=W0142
7592 def CheckArguments(self):
7593 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7594 self.op.output_fields, self.op.use_locking)
7596 def ExpandNames(self):
7597 self.iq.ExpandNames(self)
7599 def DeclareLocks(self, level):
7600 self.iq.DeclareLocks(self, level)
7602 def Exec(self, feedback_fn):
7603 return self.iq.OldStyleQuery(self)
7606 class LUInstanceFailover(LogicalUnit):
7607 """Failover an instance.
7610 HPATH = "instance-failover"
7611 HTYPE = constants.HTYPE_INSTANCE
7614 def CheckArguments(self):
7615 """Check the arguments.
7618 self.iallocator = getattr(self.op, "iallocator", None)
7619 self.target_node = getattr(self.op, "target_node", None)
7621 def ExpandNames(self):
7622 self._ExpandAndLockInstance()
7624 if self.op.target_node is not None:
7625 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7627 self.needed_locks[locking.LEVEL_NODE] = []
7628 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7630 self.needed_locks[locking.LEVEL_NODE_RES] = []
7631 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7633 ignore_consistency = self.op.ignore_consistency
7634 shutdown_timeout = self.op.shutdown_timeout
7635 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7638 ignore_consistency=ignore_consistency,
7639 shutdown_timeout=shutdown_timeout,
7640 ignore_ipolicy=self.op.ignore_ipolicy)
7641 self.tasklets = [self._migrater]
7643 def DeclareLocks(self, level):
7644 if level == locking.LEVEL_NODE:
7645 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7646 if instance.disk_template in constants.DTS_EXT_MIRROR:
7647 if self.op.target_node is None:
7648 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7650 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7651 self.op.target_node]
7652 del self.recalculate_locks[locking.LEVEL_NODE]
7654 self._LockInstancesNodes()
7655 elif level == locking.LEVEL_NODE_RES:
7657 self.needed_locks[locking.LEVEL_NODE_RES] = \
7658 self.needed_locks[locking.LEVEL_NODE][:]
7660 def BuildHooksEnv(self):
7663 This runs on master, primary and secondary nodes of the instance.
7666 instance = self._migrater.instance
7667 source_node = instance.primary_node
7668 target_node = self.op.target_node
7670 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7671 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7672 "OLD_PRIMARY": source_node,
7673 "NEW_PRIMARY": target_node,
7676 if instance.disk_template in constants.DTS_INT_MIRROR:
7677 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7678 env["NEW_SECONDARY"] = source_node
7680 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7682 env.update(_BuildInstanceHookEnvByObject(self, instance))
7686 def BuildHooksNodes(self):
7687 """Build hooks nodes.
7690 instance = self._migrater.instance
7691 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7692 return (nl, nl + [instance.primary_node])
7695 class LUInstanceMigrate(LogicalUnit):
7696 """Migrate an instance.
7698 This is migration without shutting down, compared to the failover,
7699 which is done with shutdown.
7702 HPATH = "instance-migrate"
7703 HTYPE = constants.HTYPE_INSTANCE
7706 def ExpandNames(self):
7707 self._ExpandAndLockInstance()
7709 if self.op.target_node is not None:
7710 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7712 self.needed_locks[locking.LEVEL_NODE] = []
7713 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7715 self.needed_locks[locking.LEVEL_NODE] = []
7716 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7719 TLMigrateInstance(self, self.op.instance_name,
7720 cleanup=self.op.cleanup,
7722 fallback=self.op.allow_failover,
7723 allow_runtime_changes=self.op.allow_runtime_changes,
7724 ignore_ipolicy=self.op.ignore_ipolicy)
7725 self.tasklets = [self._migrater]
7727 def DeclareLocks(self, level):
7728 if level == locking.LEVEL_NODE:
7729 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7730 if instance.disk_template in constants.DTS_EXT_MIRROR:
7731 if self.op.target_node is None:
7732 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7734 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7735 self.op.target_node]
7736 del self.recalculate_locks[locking.LEVEL_NODE]
7738 self._LockInstancesNodes()
7739 elif level == locking.LEVEL_NODE_RES:
7741 self.needed_locks[locking.LEVEL_NODE_RES] = \
7742 self.needed_locks[locking.LEVEL_NODE][:]
7744 def BuildHooksEnv(self):
7747 This runs on master, primary and secondary nodes of the instance.
7750 instance = self._migrater.instance
7751 source_node = instance.primary_node
7752 target_node = self.op.target_node
7753 env = _BuildInstanceHookEnvByObject(self, instance)
7755 "MIGRATE_LIVE": self._migrater.live,
7756 "MIGRATE_CLEANUP": self.op.cleanup,
7757 "OLD_PRIMARY": source_node,
7758 "NEW_PRIMARY": target_node,
7759 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7762 if instance.disk_template in constants.DTS_INT_MIRROR:
7763 env["OLD_SECONDARY"] = target_node
7764 env["NEW_SECONDARY"] = source_node
7766 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7770 def BuildHooksNodes(self):
7771 """Build hooks nodes.
7774 instance = self._migrater.instance
7775 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7776 return (nl, nl + [instance.primary_node])
7779 class LUInstanceMove(LogicalUnit):
7780 """Move an instance by data-copying.
7783 HPATH = "instance-move"
7784 HTYPE = constants.HTYPE_INSTANCE
7787 def ExpandNames(self):
7788 self._ExpandAndLockInstance()
7789 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7790 self.op.target_node = target_node
7791 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7792 self.needed_locks[locking.LEVEL_NODE_RES] = []
7793 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7795 def DeclareLocks(self, level):
7796 if level == locking.LEVEL_NODE:
7797 self._LockInstancesNodes(primary_only=True)
7798 elif level == locking.LEVEL_NODE_RES:
7800 self.needed_locks[locking.LEVEL_NODE_RES] = \
7801 self.needed_locks[locking.LEVEL_NODE][:]
7803 def BuildHooksEnv(self):
7806 This runs on master, primary and secondary nodes of the instance.
7810 "TARGET_NODE": self.op.target_node,
7811 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7813 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7816 def BuildHooksNodes(self):
7817 """Build hooks nodes.
7821 self.cfg.GetMasterNode(),
7822 self.instance.primary_node,
7823 self.op.target_node,
7827 def CheckPrereq(self):
7828 """Check prerequisites.
7830 This checks that the instance is in the cluster.
7833 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7834 assert self.instance is not None, \
7835 "Cannot retrieve locked instance %s" % self.op.instance_name
7837 node = self.cfg.GetNodeInfo(self.op.target_node)
7838 assert node is not None, \
7839 "Cannot retrieve locked node %s" % self.op.target_node
7841 self.target_node = target_node = node.name
7843 if target_node == instance.primary_node:
7844 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7845 (instance.name, target_node),
7848 bep = self.cfg.GetClusterInfo().FillBE(instance)
7850 for idx, dsk in enumerate(instance.disks):
7851 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7852 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7853 " cannot copy" % idx, errors.ECODE_STATE)
7855 _CheckNodeOnline(self, target_node)
7856 _CheckNodeNotDrained(self, target_node)
7857 _CheckNodeVmCapable(self, target_node)
7858 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7859 self.cfg.GetNodeGroup(node.group))
7860 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7861 ignore=self.op.ignore_ipolicy)
7863 if instance.admin_state == constants.ADMINST_UP:
7864 # check memory requirements on the secondary node
7865 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7866 instance.name, bep[constants.BE_MAXMEM],
7867 instance.hypervisor)
7869 self.LogInfo("Not checking memory on the secondary node as"
7870 " instance will not be started")
7872 # check bridge existance
7873 _CheckInstanceBridgesExist(self, instance, node=target_node)
7875 def Exec(self, feedback_fn):
7876 """Move an instance.
7878 The move is done by shutting it down on its present node, copying
7879 the data over (slow) and starting it on the new node.
7882 instance = self.instance
7884 source_node = instance.primary_node
7885 target_node = self.target_node
7887 self.LogInfo("Shutting down instance %s on source node %s",
7888 instance.name, source_node)
7890 assert (self.owned_locks(locking.LEVEL_NODE) ==
7891 self.owned_locks(locking.LEVEL_NODE_RES))
7893 result = self.rpc.call_instance_shutdown(source_node, instance,
7894 self.op.shutdown_timeout)
7895 msg = result.fail_msg
7897 if self.op.ignore_consistency:
7898 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7899 " Proceeding anyway. Please make sure node"
7900 " %s is down. Error details: %s",
7901 instance.name, source_node, source_node, msg)
7903 raise errors.OpExecError("Could not shutdown instance %s on"
7905 (instance.name, source_node, msg))
7907 # create the target disks
7909 _CreateDisks(self, instance, target_node=target_node)
7910 except errors.OpExecError:
7911 self.LogWarning("Device creation failed, reverting...")
7913 _RemoveDisks(self, instance, target_node=target_node)
7915 self.cfg.ReleaseDRBDMinors(instance.name)
7918 cluster_name = self.cfg.GetClusterInfo().cluster_name
7921 # activate, get path, copy the data over
7922 for idx, disk in enumerate(instance.disks):
7923 self.LogInfo("Copying data for disk %d", idx)
7924 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7925 instance.name, True, idx)
7927 self.LogWarning("Can't assemble newly created disk %d: %s",
7928 idx, result.fail_msg)
7929 errs.append(result.fail_msg)
7931 dev_path = result.payload
7932 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7933 target_node, dev_path,
7936 self.LogWarning("Can't copy data over for disk %d: %s",
7937 idx, result.fail_msg)
7938 errs.append(result.fail_msg)
7942 self.LogWarning("Some disks failed to copy, aborting")
7944 _RemoveDisks(self, instance, target_node=target_node)
7946 self.cfg.ReleaseDRBDMinors(instance.name)
7947 raise errors.OpExecError("Errors during disk copy: %s" %
7950 instance.primary_node = target_node
7951 self.cfg.Update(instance, feedback_fn)
7953 self.LogInfo("Removing the disks on the original node")
7954 _RemoveDisks(self, instance, target_node=source_node)
7956 # Only start the instance if it's marked as up
7957 if instance.admin_state == constants.ADMINST_UP:
7958 self.LogInfo("Starting instance %s on node %s",
7959 instance.name, target_node)
7961 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7962 ignore_secondaries=True)
7964 _ShutdownInstanceDisks(self, instance)
7965 raise errors.OpExecError("Can't activate the instance's disks")
7967 result = self.rpc.call_instance_start(target_node,
7968 (instance, None, None), False)
7969 msg = result.fail_msg
7971 _ShutdownInstanceDisks(self, instance)
7972 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7973 (instance.name, target_node, msg))
7976 class LUNodeMigrate(LogicalUnit):
7977 """Migrate all instances from a node.
7980 HPATH = "node-migrate"
7981 HTYPE = constants.HTYPE_NODE
7984 def CheckArguments(self):
7987 def ExpandNames(self):
7988 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7990 self.share_locks = _ShareAll()
7991 self.needed_locks = {
7992 locking.LEVEL_NODE: [self.op.node_name],
7995 def BuildHooksEnv(self):
7998 This runs on the master, the primary and all the secondaries.
8002 "NODE_NAME": self.op.node_name,
8003 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8006 def BuildHooksNodes(self):
8007 """Build hooks nodes.
8010 nl = [self.cfg.GetMasterNode()]
8013 def CheckPrereq(self):
8016 def Exec(self, feedback_fn):
8017 # Prepare jobs for migration instances
8018 allow_runtime_changes = self.op.allow_runtime_changes
8020 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8023 iallocator=self.op.iallocator,
8024 target_node=self.op.target_node,
8025 allow_runtime_changes=allow_runtime_changes,
8026 ignore_ipolicy=self.op.ignore_ipolicy)]
8027 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8030 # TODO: Run iallocator in this opcode and pass correct placement options to
8031 # OpInstanceMigrate. Since other jobs can modify the cluster between
8032 # running the iallocator and the actual migration, a good consistency model
8033 # will have to be found.
8035 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8036 frozenset([self.op.node_name]))
8038 return ResultWithJobs(jobs)
8041 class TLMigrateInstance(Tasklet):
8042 """Tasklet class for instance migration.
8045 @ivar live: whether the migration will be done live or non-live;
8046 this variable is initalized only after CheckPrereq has run
8047 @type cleanup: boolean
8048 @ivar cleanup: Wheater we cleanup from a failed migration
8049 @type iallocator: string
8050 @ivar iallocator: The iallocator used to determine target_node
8051 @type target_node: string
8052 @ivar target_node: If given, the target_node to reallocate the instance to
8053 @type failover: boolean
8054 @ivar failover: Whether operation results in failover or migration
8055 @type fallback: boolean
8056 @ivar fallback: Whether fallback to failover is allowed if migration not
8058 @type ignore_consistency: boolean
8059 @ivar ignore_consistency: Wheter we should ignore consistency between source
8061 @type shutdown_timeout: int
8062 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8063 @type ignore_ipolicy: bool
8064 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8069 _MIGRATION_POLL_INTERVAL = 1 # seconds
8070 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8072 def __init__(self, lu, instance_name, cleanup=False,
8073 failover=False, fallback=False,
8074 ignore_consistency=False,
8075 allow_runtime_changes=True,
8076 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8077 ignore_ipolicy=False):
8078 """Initializes this class.
8081 Tasklet.__init__(self, lu)
8084 self.instance_name = instance_name
8085 self.cleanup = cleanup
8086 self.live = False # will be overridden later
8087 self.failover = failover
8088 self.fallback = fallback
8089 self.ignore_consistency = ignore_consistency
8090 self.shutdown_timeout = shutdown_timeout
8091 self.ignore_ipolicy = ignore_ipolicy
8092 self.allow_runtime_changes = allow_runtime_changes
8094 def CheckPrereq(self):
8095 """Check prerequisites.
8097 This checks that the instance is in the cluster.
8100 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8101 instance = self.cfg.GetInstanceInfo(instance_name)
8102 assert instance is not None
8103 self.instance = instance
8104 cluster = self.cfg.GetClusterInfo()
8106 if (not self.cleanup and
8107 not instance.admin_state == constants.ADMINST_UP and
8108 not self.failover and self.fallback):
8109 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8110 " switching to failover")
8111 self.failover = True
8113 if instance.disk_template not in constants.DTS_MIRRORED:
8118 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8119 " %s" % (instance.disk_template, text),
8122 if instance.disk_template in constants.DTS_EXT_MIRROR:
8123 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8125 if self.lu.op.iallocator:
8126 self._RunAllocator()
8128 # We set set self.target_node as it is required by
8130 self.target_node = self.lu.op.target_node
8132 # Check that the target node is correct in terms of instance policy
8133 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8134 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8135 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8136 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8137 ignore=self.ignore_ipolicy)
8139 # self.target_node is already populated, either directly or by the
8141 target_node = self.target_node
8142 if self.target_node == instance.primary_node:
8143 raise errors.OpPrereqError("Cannot migrate instance %s"
8144 " to its primary (%s)" %
8145 (instance.name, instance.primary_node))
8147 if len(self.lu.tasklets) == 1:
8148 # It is safe to release locks only when we're the only tasklet
8150 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8151 keep=[instance.primary_node, self.target_node])
8154 secondary_nodes = instance.secondary_nodes
8155 if not secondary_nodes:
8156 raise errors.ConfigurationError("No secondary node but using"
8157 " %s disk template" %
8158 instance.disk_template)
8159 target_node = secondary_nodes[0]
8160 if self.lu.op.iallocator or (self.lu.op.target_node and
8161 self.lu.op.target_node != target_node):
8163 text = "failed over"
8166 raise errors.OpPrereqError("Instances with disk template %s cannot"
8167 " be %s to arbitrary nodes"
8168 " (neither an iallocator nor a target"
8169 " node can be passed)" %
8170 (instance.disk_template, text),
8172 nodeinfo = self.cfg.GetNodeInfo(target_node)
8173 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8174 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8175 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8176 ignore=self.ignore_ipolicy)
8178 i_be = cluster.FillBE(instance)
8180 # check memory requirements on the secondary node
8181 if (not self.cleanup and
8182 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8183 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8184 "migrating instance %s" %
8186 i_be[constants.BE_MINMEM],
8187 instance.hypervisor)
8189 self.lu.LogInfo("Not checking memory on the secondary node as"
8190 " instance will not be started")
8192 # check if failover must be forced instead of migration
8193 if (not self.cleanup and not self.failover and
8194 i_be[constants.BE_ALWAYS_FAILOVER]):
8196 self.lu.LogInfo("Instance configured to always failover; fallback"
8198 self.failover = True
8200 raise errors.OpPrereqError("This instance has been configured to"
8201 " always failover, please allow failover",
8204 # check bridge existance
8205 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8207 if not self.cleanup:
8208 _CheckNodeNotDrained(self.lu, target_node)
8209 if not self.failover:
8210 result = self.rpc.call_instance_migratable(instance.primary_node,
8212 if result.fail_msg and self.fallback:
8213 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8215 self.failover = True
8217 result.Raise("Can't migrate, please use failover",
8218 prereq=True, ecode=errors.ECODE_STATE)
8220 assert not (self.failover and self.cleanup)
8222 if not self.failover:
8223 if self.lu.op.live is not None and self.lu.op.mode is not None:
8224 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8225 " parameters are accepted",
8227 if self.lu.op.live is not None:
8229 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8231 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8232 # reset the 'live' parameter to None so that repeated
8233 # invocations of CheckPrereq do not raise an exception
8234 self.lu.op.live = None
8235 elif self.lu.op.mode is None:
8236 # read the default value from the hypervisor
8237 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8238 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8240 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8242 # Failover is never live
8245 if not (self.failover or self.cleanup):
8246 remote_info = self.rpc.call_instance_info(instance.primary_node,
8248 instance.hypervisor)
8249 remote_info.Raise("Error checking instance on node %s" %
8250 instance.primary_node)
8251 instance_running = bool(remote_info.payload)
8252 if instance_running:
8253 self.current_mem = int(remote_info.payload["memory"])
8255 def _RunAllocator(self):
8256 """Run the allocator based on input opcode.
8259 # FIXME: add a self.ignore_ipolicy option
8260 ial = IAllocator(self.cfg, self.rpc,
8261 mode=constants.IALLOCATOR_MODE_RELOC,
8262 name=self.instance_name,
8263 relocate_from=[self.instance.primary_node],
8266 ial.Run(self.lu.op.iallocator)
8269 raise errors.OpPrereqError("Can't compute nodes using"
8270 " iallocator '%s': %s" %
8271 (self.lu.op.iallocator, ial.info),
8273 if len(ial.result) != ial.required_nodes:
8274 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8275 " of nodes (%s), required %s" %
8276 (self.lu.op.iallocator, len(ial.result),
8277 ial.required_nodes), errors.ECODE_FAULT)
8278 self.target_node = ial.result[0]
8279 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8280 self.instance_name, self.lu.op.iallocator,
8281 utils.CommaJoin(ial.result))
8283 def _WaitUntilSync(self):
8284 """Poll with custom rpc for disk sync.
8286 This uses our own step-based rpc call.
8289 self.feedback_fn("* wait until resync is done")
8293 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8295 (self.instance.disks,
8298 for node, nres in result.items():
8299 nres.Raise("Cannot resync disks on node %s" % node)
8300 node_done, node_percent = nres.payload
8301 all_done = all_done and node_done
8302 if node_percent is not None:
8303 min_percent = min(min_percent, node_percent)
8305 if min_percent < 100:
8306 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8309 def _EnsureSecondary(self, node):
8310 """Demote a node to secondary.
8313 self.feedback_fn("* switching node %s to secondary mode" % node)
8315 for dev in self.instance.disks:
8316 self.cfg.SetDiskID(dev, node)
8318 result = self.rpc.call_blockdev_close(node, self.instance.name,
8319 self.instance.disks)
8320 result.Raise("Cannot change disk to secondary on node %s" % node)
8322 def _GoStandalone(self):
8323 """Disconnect from the network.
8326 self.feedback_fn("* changing into standalone mode")
8327 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8328 self.instance.disks)
8329 for node, nres in result.items():
8330 nres.Raise("Cannot disconnect disks node %s" % node)
8332 def _GoReconnect(self, multimaster):
8333 """Reconnect to the network.
8339 msg = "single-master"
8340 self.feedback_fn("* changing disks into %s mode" % msg)
8341 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8342 (self.instance.disks, self.instance),
8343 self.instance.name, multimaster)
8344 for node, nres in result.items():
8345 nres.Raise("Cannot change disks config on node %s" % node)
8347 def _ExecCleanup(self):
8348 """Try to cleanup after a failed migration.
8350 The cleanup is done by:
8351 - check that the instance is running only on one node
8352 (and update the config if needed)
8353 - change disks on its secondary node to secondary
8354 - wait until disks are fully synchronized
8355 - disconnect from the network
8356 - change disks into single-master mode
8357 - wait again until disks are fully synchronized
8360 instance = self.instance
8361 target_node = self.target_node
8362 source_node = self.source_node
8364 # check running on only one node
8365 self.feedback_fn("* checking where the instance actually runs"
8366 " (if this hangs, the hypervisor might be in"
8368 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8369 for node, result in ins_l.items():
8370 result.Raise("Can't contact node %s" % node)
8372 runningon_source = instance.name in ins_l[source_node].payload
8373 runningon_target = instance.name in ins_l[target_node].payload
8375 if runningon_source and runningon_target:
8376 raise errors.OpExecError("Instance seems to be running on two nodes,"
8377 " or the hypervisor is confused; you will have"
8378 " to ensure manually that it runs only on one"
8379 " and restart this operation")
8381 if not (runningon_source or runningon_target):
8382 raise errors.OpExecError("Instance does not seem to be running at all;"
8383 " in this case it's safer to repair by"
8384 " running 'gnt-instance stop' to ensure disk"
8385 " shutdown, and then restarting it")
8387 if runningon_target:
8388 # the migration has actually succeeded, we need to update the config
8389 self.feedback_fn("* instance running on secondary node (%s),"
8390 " updating config" % target_node)
8391 instance.primary_node = target_node
8392 self.cfg.Update(instance, self.feedback_fn)
8393 demoted_node = source_node
8395 self.feedback_fn("* instance confirmed to be running on its"
8396 " primary node (%s)" % source_node)
8397 demoted_node = target_node
8399 if instance.disk_template in constants.DTS_INT_MIRROR:
8400 self._EnsureSecondary(demoted_node)
8402 self._WaitUntilSync()
8403 except errors.OpExecError:
8404 # we ignore here errors, since if the device is standalone, it
8405 # won't be able to sync
8407 self._GoStandalone()
8408 self._GoReconnect(False)
8409 self._WaitUntilSync()
8411 self.feedback_fn("* done")
8413 def _RevertDiskStatus(self):
8414 """Try to revert the disk status after a failed migration.
8417 target_node = self.target_node
8418 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8422 self._EnsureSecondary(target_node)
8423 self._GoStandalone()
8424 self._GoReconnect(False)
8425 self._WaitUntilSync()
8426 except errors.OpExecError, err:
8427 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8428 " please try to recover the instance manually;"
8429 " error '%s'" % str(err))
8431 def _AbortMigration(self):
8432 """Call the hypervisor code to abort a started migration.
8435 instance = self.instance
8436 target_node = self.target_node
8437 source_node = self.source_node
8438 migration_info = self.migration_info
8440 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8444 abort_msg = abort_result.fail_msg
8446 logging.error("Aborting migration failed on target node %s: %s",
8447 target_node, abort_msg)
8448 # Don't raise an exception here, as we stil have to try to revert the
8449 # disk status, even if this step failed.
8451 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8452 instance, False, self.live)
8453 abort_msg = abort_result.fail_msg
8455 logging.error("Aborting migration failed on source node %s: %s",
8456 source_node, abort_msg)
8458 def _ExecMigration(self):
8459 """Migrate an instance.
8461 The migrate is done by:
8462 - change the disks into dual-master mode
8463 - wait until disks are fully synchronized again
8464 - migrate the instance
8465 - change disks on the new secondary node (the old primary) to secondary
8466 - wait until disks are fully synchronized
8467 - change disks into single-master mode
8470 instance = self.instance
8471 target_node = self.target_node
8472 source_node = self.source_node
8474 # Check for hypervisor version mismatch and warn the user.
8475 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8476 None, [self.instance.hypervisor])
8477 for ninfo in nodeinfo.values():
8478 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8480 (_, _, (src_info, )) = nodeinfo[source_node].payload
8481 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8483 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8484 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8485 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8486 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8487 if src_version != dst_version:
8488 self.feedback_fn("* warning: hypervisor version mismatch between"
8489 " source (%s) and target (%s) node" %
8490 (src_version, dst_version))
8492 self.feedback_fn("* checking disk consistency between source and target")
8493 for (idx, dev) in enumerate(instance.disks):
8494 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8495 raise errors.OpExecError("Disk %s is degraded or not fully"
8496 " synchronized on target node,"
8497 " aborting migration" % idx)
8499 if self.current_mem > self.tgt_free_mem:
8500 if not self.allow_runtime_changes:
8501 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8502 " free memory to fit instance %s on target"
8503 " node %s (have %dMB, need %dMB)" %
8504 (instance.name, target_node,
8505 self.tgt_free_mem, self.current_mem))
8506 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8507 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8510 rpcres.Raise("Cannot modify instance runtime memory")
8512 # First get the migration information from the remote node
8513 result = self.rpc.call_migration_info(source_node, instance)
8514 msg = result.fail_msg
8516 log_err = ("Failed fetching source migration information from %s: %s" %
8518 logging.error(log_err)
8519 raise errors.OpExecError(log_err)
8521 self.migration_info = migration_info = result.payload
8523 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8524 # Then switch the disks to master/master mode
8525 self._EnsureSecondary(target_node)
8526 self._GoStandalone()
8527 self._GoReconnect(True)
8528 self._WaitUntilSync()
8530 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8531 result = self.rpc.call_accept_instance(target_node,
8534 self.nodes_ip[target_node])
8536 msg = result.fail_msg
8538 logging.error("Instance pre-migration failed, trying to revert"
8539 " disk status: %s", msg)
8540 self.feedback_fn("Pre-migration failed, aborting")
8541 self._AbortMigration()
8542 self._RevertDiskStatus()
8543 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8544 (instance.name, msg))
8546 self.feedback_fn("* migrating instance to %s" % target_node)
8547 result = self.rpc.call_instance_migrate(source_node, instance,
8548 self.nodes_ip[target_node],
8550 msg = result.fail_msg
8552 logging.error("Instance migration failed, trying to revert"
8553 " disk status: %s", msg)
8554 self.feedback_fn("Migration failed, aborting")
8555 self._AbortMigration()
8556 self._RevertDiskStatus()
8557 raise errors.OpExecError("Could not migrate instance %s: %s" %
8558 (instance.name, msg))
8560 self.feedback_fn("* starting memory transfer")
8561 last_feedback = time.time()
8563 result = self.rpc.call_instance_get_migration_status(source_node,
8565 msg = result.fail_msg
8566 ms = result.payload # MigrationStatus instance
8567 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8568 logging.error("Instance migration failed, trying to revert"
8569 " disk status: %s", msg)
8570 self.feedback_fn("Migration failed, aborting")
8571 self._AbortMigration()
8572 self._RevertDiskStatus()
8573 raise errors.OpExecError("Could not migrate instance %s: %s" %
8574 (instance.name, msg))
8576 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8577 self.feedback_fn("* memory transfer complete")
8580 if (utils.TimeoutExpired(last_feedback,
8581 self._MIGRATION_FEEDBACK_INTERVAL) and
8582 ms.transferred_ram is not None):
8583 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8584 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8585 last_feedback = time.time()
8587 time.sleep(self._MIGRATION_POLL_INTERVAL)
8589 result = self.rpc.call_instance_finalize_migration_src(source_node,
8593 msg = result.fail_msg
8595 logging.error("Instance migration succeeded, but finalization failed"
8596 " on the source node: %s", msg)
8597 raise errors.OpExecError("Could not finalize instance migration: %s" %
8600 instance.primary_node = target_node
8602 # distribute new instance config to the other nodes
8603 self.cfg.Update(instance, self.feedback_fn)
8605 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8609 msg = result.fail_msg
8611 logging.error("Instance migration succeeded, but finalization failed"
8612 " on the target node: %s", msg)
8613 raise errors.OpExecError("Could not finalize instance migration: %s" %
8616 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8617 self._EnsureSecondary(source_node)
8618 self._WaitUntilSync()
8619 self._GoStandalone()
8620 self._GoReconnect(False)
8621 self._WaitUntilSync()
8623 # If the instance's disk template is `rbd' and there was a successful
8624 # migration, unmap the device from the source node.
8625 if self.instance.disk_template == constants.DT_RBD:
8626 disks = _ExpandCheckDisks(instance, instance.disks)
8627 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8629 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8630 msg = result.fail_msg
8632 logging.error("Migration was successful, but couldn't unmap the"
8633 " block device %s on source node %s: %s",
8634 disk.iv_name, source_node, msg)
8635 logging.error("You need to unmap the device %s manually on %s",
8636 disk.iv_name, source_node)
8638 self.feedback_fn("* done")
8640 def _ExecFailover(self):
8641 """Failover an instance.
8643 The failover is done by shutting it down on its present node and
8644 starting it on the secondary.
8647 instance = self.instance
8648 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8650 source_node = instance.primary_node
8651 target_node = self.target_node
8653 if instance.admin_state == constants.ADMINST_UP:
8654 self.feedback_fn("* checking disk consistency between source and target")
8655 for (idx, dev) in enumerate(instance.disks):
8656 # for drbd, these are drbd over lvm
8657 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8659 if primary_node.offline:
8660 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8662 (primary_node.name, idx, target_node))
8663 elif not self.ignore_consistency:
8664 raise errors.OpExecError("Disk %s is degraded on target node,"
8665 " aborting failover" % idx)
8667 self.feedback_fn("* not checking disk consistency as instance is not"
8670 self.feedback_fn("* shutting down instance on source node")
8671 logging.info("Shutting down instance %s on node %s",
8672 instance.name, source_node)
8674 result = self.rpc.call_instance_shutdown(source_node, instance,
8675 self.shutdown_timeout)
8676 msg = result.fail_msg
8678 if self.ignore_consistency or primary_node.offline:
8679 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8680 " proceeding anyway; please make sure node"
8681 " %s is down; error details: %s",
8682 instance.name, source_node, source_node, msg)
8684 raise errors.OpExecError("Could not shutdown instance %s on"
8686 (instance.name, source_node, msg))
8688 self.feedback_fn("* deactivating the instance's disks on source node")
8689 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8690 raise errors.OpExecError("Can't shut down the instance's disks")
8692 instance.primary_node = target_node
8693 # distribute new instance config to the other nodes
8694 self.cfg.Update(instance, self.feedback_fn)
8696 # Only start the instance if it's marked as up
8697 if instance.admin_state == constants.ADMINST_UP:
8698 self.feedback_fn("* activating the instance's disks on target node %s" %
8700 logging.info("Starting instance %s on node %s",
8701 instance.name, target_node)
8703 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8704 ignore_secondaries=True)
8706 _ShutdownInstanceDisks(self.lu, instance)
8707 raise errors.OpExecError("Can't activate the instance's disks")
8709 self.feedback_fn("* starting the instance on the target node %s" %
8711 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8713 msg = result.fail_msg
8715 _ShutdownInstanceDisks(self.lu, instance)
8716 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8717 (instance.name, target_node, msg))
8719 def Exec(self, feedback_fn):
8720 """Perform the migration.
8723 self.feedback_fn = feedback_fn
8724 self.source_node = self.instance.primary_node
8726 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8727 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8728 self.target_node = self.instance.secondary_nodes[0]
8729 # Otherwise self.target_node has been populated either
8730 # directly, or through an iallocator.
8732 self.all_nodes = [self.source_node, self.target_node]
8733 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8734 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8737 feedback_fn("Failover instance %s" % self.instance.name)
8738 self._ExecFailover()
8740 feedback_fn("Migrating instance %s" % self.instance.name)
8743 return self._ExecCleanup()
8745 return self._ExecMigration()
8748 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8750 """Wrapper around L{_CreateBlockDevInner}.
8752 This method annotates the root device first.
8755 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8756 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8760 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8762 """Create a tree of block devices on a given node.
8764 If this device type has to be created on secondaries, create it and
8767 If not, just recurse to children keeping the same 'force' value.
8769 @attention: The device has to be annotated already.
8771 @param lu: the lu on whose behalf we execute
8772 @param node: the node on which to create the device
8773 @type instance: L{objects.Instance}
8774 @param instance: the instance which owns the device
8775 @type device: L{objects.Disk}
8776 @param device: the device to create
8777 @type force_create: boolean
8778 @param force_create: whether to force creation of this device; this
8779 will be change to True whenever we find a device which has
8780 CreateOnSecondary() attribute
8781 @param info: the extra 'metadata' we should attach to the device
8782 (this will be represented as a LVM tag)
8783 @type force_open: boolean
8784 @param force_open: this parameter will be passes to the
8785 L{backend.BlockdevCreate} function where it specifies
8786 whether we run on primary or not, and it affects both
8787 the child assembly and the device own Open() execution
8790 if device.CreateOnSecondary():
8794 for child in device.children:
8795 _CreateBlockDevInner(lu, node, instance, child, force_create,
8798 if not force_create:
8801 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8804 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8805 """Create a single block device on a given node.
8807 This will not recurse over children of the device, so they must be
8810 @param lu: the lu on whose behalf we execute
8811 @param node: the node on which to create the device
8812 @type instance: L{objects.Instance}
8813 @param instance: the instance which owns the device
8814 @type device: L{objects.Disk}
8815 @param device: the device to create
8816 @param info: the extra 'metadata' we should attach to the device
8817 (this will be represented as a LVM tag)
8818 @type force_open: boolean
8819 @param force_open: this parameter will be passes to the
8820 L{backend.BlockdevCreate} function where it specifies
8821 whether we run on primary or not, and it affects both
8822 the child assembly and the device own Open() execution
8825 lu.cfg.SetDiskID(device, node)
8826 result = lu.rpc.call_blockdev_create(node, device, device.size,
8827 instance.name, force_open, info)
8828 result.Raise("Can't create block device %s on"
8829 " node %s for instance %s" % (device, node, instance.name))
8830 if device.physical_id is None:
8831 device.physical_id = result.payload
8834 def _GenerateUniqueNames(lu, exts):
8835 """Generate a suitable LV name.
8837 This will generate a logical volume name for the given instance.
8842 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8843 results.append("%s%s" % (new_id, val))
8847 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8848 iv_name, p_minor, s_minor):
8849 """Generate a drbd8 device complete with its children.
8852 assert len(vgnames) == len(names) == 2
8853 port = lu.cfg.AllocatePort()
8854 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8856 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8857 logical_id=(vgnames[0], names[0]),
8859 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8860 logical_id=(vgnames[1], names[1]),
8862 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8863 logical_id=(primary, secondary, port,
8866 children=[dev_data, dev_meta],
8867 iv_name=iv_name, params={})
8871 _DISK_TEMPLATE_NAME_PREFIX = {
8872 constants.DT_PLAIN: "",
8873 constants.DT_RBD: ".rbd",
8877 _DISK_TEMPLATE_DEVICE_TYPE = {
8878 constants.DT_PLAIN: constants.LD_LV,
8879 constants.DT_FILE: constants.LD_FILE,
8880 constants.DT_SHARED_FILE: constants.LD_FILE,
8881 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8882 constants.DT_RBD: constants.LD_RBD,
8886 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8887 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8888 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8889 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8890 """Generate the entire disk layout for a given template type.
8893 #TODO: compute space requirements
8895 vgname = lu.cfg.GetVGName()
8896 disk_count = len(disk_info)
8899 if template_name == constants.DT_DISKLESS:
8901 elif template_name == constants.DT_DRBD8:
8902 if len(secondary_nodes) != 1:
8903 raise errors.ProgrammerError("Wrong template configuration")
8904 remote_node = secondary_nodes[0]
8905 minors = lu.cfg.AllocateDRBDMinor(
8906 [primary_node, remote_node] * len(disk_info), instance_name)
8908 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8910 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8913 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8914 for i in range(disk_count)]):
8915 names.append(lv_prefix + "_data")
8916 names.append(lv_prefix + "_meta")
8917 for idx, disk in enumerate(disk_info):
8918 disk_index = idx + base_index
8919 data_vg = disk.get(constants.IDISK_VG, vgname)
8920 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8921 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8922 disk[constants.IDISK_SIZE],
8924 names[idx * 2:idx * 2 + 2],
8925 "disk/%d" % disk_index,
8926 minors[idx * 2], minors[idx * 2 + 1])
8927 disk_dev.mode = disk[constants.IDISK_MODE]
8928 disks.append(disk_dev)
8931 raise errors.ProgrammerError("Wrong template configuration")
8933 if template_name == constants.DT_FILE:
8935 elif template_name == constants.DT_SHARED_FILE:
8936 _req_shr_file_storage()
8938 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8939 if name_prefix is None:
8942 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8943 (name_prefix, base_index + i)
8944 for i in range(disk_count)])
8946 if template_name == constants.DT_PLAIN:
8947 def logical_id_fn(idx, _, disk):
8948 vg = disk.get(constants.IDISK_VG, vgname)
8949 return (vg, names[idx])
8950 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8952 lambda _, disk_index, disk: (file_driver,
8953 "%s/disk%d" % (file_storage_dir,
8955 elif template_name == constants.DT_BLOCK:
8957 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8958 disk[constants.IDISK_ADOPT])
8959 elif template_name == constants.DT_RBD:
8960 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8962 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8964 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8966 for idx, disk in enumerate(disk_info):
8967 disk_index = idx + base_index
8968 size = disk[constants.IDISK_SIZE]
8969 feedback_fn("* disk %s, size %s" %
8970 (disk_index, utils.FormatUnit(size, "h")))
8971 disks.append(objects.Disk(dev_type=dev_type, size=size,
8972 logical_id=logical_id_fn(idx, disk_index, disk),
8973 iv_name="disk/%d" % disk_index,
8974 mode=disk[constants.IDISK_MODE],
8980 def _GetInstanceInfoText(instance):
8981 """Compute that text that should be added to the disk's metadata.
8984 return "originstname+%s" % instance.name
8987 def _CalcEta(time_taken, written, total_size):
8988 """Calculates the ETA based on size written and total size.
8990 @param time_taken: The time taken so far
8991 @param written: amount written so far
8992 @param total_size: The total size of data to be written
8993 @return: The remaining time in seconds
8996 avg_time = time_taken / float(written)
8997 return (total_size - written) * avg_time
9000 def _WipeDisks(lu, instance):
9001 """Wipes instance disks.
9003 @type lu: L{LogicalUnit}
9004 @param lu: the logical unit on whose behalf we execute
9005 @type instance: L{objects.Instance}
9006 @param instance: the instance whose disks we should create
9007 @return: the success of the wipe
9010 node = instance.primary_node
9012 for device in instance.disks:
9013 lu.cfg.SetDiskID(device, node)
9015 logging.info("Pause sync of instance %s disks", instance.name)
9016 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9017 (instance.disks, instance),
9020 for idx, success in enumerate(result.payload):
9022 logging.warn("pause-sync of instance %s for disks %d failed",
9026 for idx, device in enumerate(instance.disks):
9027 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9028 # MAX_WIPE_CHUNK at max
9029 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9030 constants.MIN_WIPE_CHUNK_PERCENT)
9031 # we _must_ make this an int, otherwise rounding errors will
9033 wipe_chunk_size = int(wipe_chunk_size)
9035 lu.LogInfo("* Wiping disk %d", idx)
9036 logging.info("Wiping disk %d for instance %s, node %s using"
9037 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9042 start_time = time.time()
9044 while offset < size:
9045 wipe_size = min(wipe_chunk_size, size - offset)
9046 logging.debug("Wiping disk %d, offset %s, chunk %s",
9047 idx, offset, wipe_size)
9048 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9050 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9051 (idx, offset, wipe_size))
9054 if now - last_output >= 60:
9055 eta = _CalcEta(now - start_time, offset, size)
9056 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9057 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9060 logging.info("Resume sync of instance %s disks", instance.name)
9062 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9063 (instance.disks, instance),
9066 for idx, success in enumerate(result.payload):
9068 lu.LogWarning("Resume sync of disk %d failed, please have a"
9069 " look at the status and troubleshoot the issue", idx)
9070 logging.warn("resume-sync of instance %s for disks %d failed",
9074 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9075 """Create all disks for an instance.
9077 This abstracts away some work from AddInstance.
9079 @type lu: L{LogicalUnit}
9080 @param lu: the logical unit on whose behalf we execute
9081 @type instance: L{objects.Instance}
9082 @param instance: the instance whose disks we should create
9084 @param to_skip: list of indices to skip
9085 @type target_node: string
9086 @param target_node: if passed, overrides the target node for creation
9088 @return: the success of the creation
9091 info = _GetInstanceInfoText(instance)
9092 if target_node is None:
9093 pnode = instance.primary_node
9094 all_nodes = instance.all_nodes
9099 if instance.disk_template in constants.DTS_FILEBASED:
9100 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9101 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9103 result.Raise("Failed to create directory '%s' on"
9104 " node %s" % (file_storage_dir, pnode))
9106 # Note: this needs to be kept in sync with adding of disks in
9107 # LUInstanceSetParams
9108 for idx, device in enumerate(instance.disks):
9109 if to_skip and idx in to_skip:
9111 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9113 for node in all_nodes:
9114 f_create = node == pnode
9115 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9118 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9119 """Remove all disks for an instance.
9121 This abstracts away some work from `AddInstance()` and
9122 `RemoveInstance()`. Note that in case some of the devices couldn't
9123 be removed, the removal will continue with the other ones (compare
9124 with `_CreateDisks()`).
9126 @type lu: L{LogicalUnit}
9127 @param lu: the logical unit on whose behalf we execute
9128 @type instance: L{objects.Instance}
9129 @param instance: the instance whose disks we should remove
9130 @type target_node: string
9131 @param target_node: used to override the node on which to remove the disks
9133 @return: the success of the removal
9136 logging.info("Removing block devices for instance %s", instance.name)
9139 ports_to_release = set()
9140 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9141 for (idx, device) in enumerate(anno_disks):
9143 edata = [(target_node, device)]
9145 edata = device.ComputeNodeTree(instance.primary_node)
9146 for node, disk in edata:
9147 lu.cfg.SetDiskID(disk, node)
9148 result = lu.rpc.call_blockdev_remove(node, disk)
9150 lu.LogWarning("Could not remove disk %s on node %s,"
9151 " continuing anyway: %s", idx, node, result.fail_msg)
9152 if not (result.offline and node != instance.primary_node):
9155 # if this is a DRBD disk, return its port to the pool
9156 if device.dev_type in constants.LDS_DRBD:
9157 ports_to_release.add(device.logical_id[2])
9159 if all_result or ignore_failures:
9160 for port in ports_to_release:
9161 lu.cfg.AddTcpUdpPort(port)
9163 if instance.disk_template == constants.DT_FILE:
9164 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9168 tgt = instance.primary_node
9169 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9171 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9172 file_storage_dir, instance.primary_node, result.fail_msg)
9178 def _ComputeDiskSizePerVG(disk_template, disks):
9179 """Compute disk size requirements in the volume group
9182 def _compute(disks, payload):
9183 """Universal algorithm.
9188 vgs[disk[constants.IDISK_VG]] = \
9189 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9193 # Required free disk space as a function of disk and swap space
9195 constants.DT_DISKLESS: {},
9196 constants.DT_PLAIN: _compute(disks, 0),
9197 # 128 MB are added for drbd metadata for each disk
9198 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9199 constants.DT_FILE: {},
9200 constants.DT_SHARED_FILE: {},
9203 if disk_template not in req_size_dict:
9204 raise errors.ProgrammerError("Disk template '%s' size requirement"
9205 " is unknown" % disk_template)
9207 return req_size_dict[disk_template]
9210 def _ComputeDiskSize(disk_template, disks):
9211 """Compute disk size requirements in the volume group
9214 # Required free disk space as a function of disk and swap space
9216 constants.DT_DISKLESS: None,
9217 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9218 # 128 MB are added for drbd metadata for each disk
9220 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9221 constants.DT_FILE: None,
9222 constants.DT_SHARED_FILE: 0,
9223 constants.DT_BLOCK: 0,
9224 constants.DT_RBD: 0,
9227 if disk_template not in req_size_dict:
9228 raise errors.ProgrammerError("Disk template '%s' size requirement"
9229 " is unknown" % disk_template)
9231 return req_size_dict[disk_template]
9234 def _FilterVmNodes(lu, nodenames):
9235 """Filters out non-vm_capable nodes from a list.
9237 @type lu: L{LogicalUnit}
9238 @param lu: the logical unit for which we check
9239 @type nodenames: list
9240 @param nodenames: the list of nodes on which we should check
9242 @return: the list of vm-capable nodes
9245 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9246 return [name for name in nodenames if name not in vm_nodes]
9249 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9250 """Hypervisor parameter validation.
9252 This function abstract the hypervisor parameter validation to be
9253 used in both instance create and instance modify.
9255 @type lu: L{LogicalUnit}
9256 @param lu: the logical unit for which we check
9257 @type nodenames: list
9258 @param nodenames: the list of nodes on which we should check
9259 @type hvname: string
9260 @param hvname: the name of the hypervisor we should use
9261 @type hvparams: dict
9262 @param hvparams: the parameters which we need to check
9263 @raise errors.OpPrereqError: if the parameters are not valid
9266 nodenames = _FilterVmNodes(lu, nodenames)
9268 cluster = lu.cfg.GetClusterInfo()
9269 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9271 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9272 for node in nodenames:
9276 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9279 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9280 """OS parameters validation.
9282 @type lu: L{LogicalUnit}
9283 @param lu: the logical unit for which we check
9284 @type required: boolean
9285 @param required: whether the validation should fail if the OS is not
9287 @type nodenames: list
9288 @param nodenames: the list of nodes on which we should check
9289 @type osname: string
9290 @param osname: the name of the hypervisor we should use
9291 @type osparams: dict
9292 @param osparams: the parameters which we need to check
9293 @raise errors.OpPrereqError: if the parameters are not valid
9296 nodenames = _FilterVmNodes(lu, nodenames)
9297 result = lu.rpc.call_os_validate(nodenames, required, osname,
9298 [constants.OS_VALIDATE_PARAMETERS],
9300 for node, nres in result.items():
9301 # we don't check for offline cases since this should be run only
9302 # against the master node and/or an instance's nodes
9303 nres.Raise("OS Parameters validation failed on node %s" % node)
9304 if not nres.payload:
9305 lu.LogInfo("OS %s not found on node %s, validation skipped",
9309 class LUInstanceCreate(LogicalUnit):
9310 """Create an instance.
9313 HPATH = "instance-add"
9314 HTYPE = constants.HTYPE_INSTANCE
9317 def CheckArguments(self):
9321 # do not require name_check to ease forward/backward compatibility
9323 if self.op.no_install and self.op.start:
9324 self.LogInfo("No-installation mode selected, disabling startup")
9325 self.op.start = False
9326 # validate/normalize the instance name
9327 self.op.instance_name = \
9328 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9330 if self.op.ip_check and not self.op.name_check:
9331 # TODO: make the ip check more flexible and not depend on the name check
9332 raise errors.OpPrereqError("Cannot do IP address check without a name"
9333 " check", errors.ECODE_INVAL)
9335 # check nics' parameter names
9336 for nic in self.op.nics:
9337 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9339 # check disks. parameter names and consistent adopt/no-adopt strategy
9340 has_adopt = has_no_adopt = False
9341 for disk in self.op.disks:
9342 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9343 if constants.IDISK_ADOPT in disk:
9347 if has_adopt and has_no_adopt:
9348 raise errors.OpPrereqError("Either all disks are adopted or none is",
9351 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9352 raise errors.OpPrereqError("Disk adoption is not supported for the"
9353 " '%s' disk template" %
9354 self.op.disk_template,
9356 if self.op.iallocator is not None:
9357 raise errors.OpPrereqError("Disk adoption not allowed with an"
9358 " iallocator script", errors.ECODE_INVAL)
9359 if self.op.mode == constants.INSTANCE_IMPORT:
9360 raise errors.OpPrereqError("Disk adoption not allowed for"
9361 " instance import", errors.ECODE_INVAL)
9363 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9364 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9365 " but no 'adopt' parameter given" %
9366 self.op.disk_template,
9369 self.adopt_disks = has_adopt
9371 # instance name verification
9372 if self.op.name_check:
9373 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9374 self.op.instance_name = self.hostname1.name
9375 # used in CheckPrereq for ip ping check
9376 self.check_ip = self.hostname1.ip
9378 self.check_ip = None
9380 # file storage checks
9381 if (self.op.file_driver and
9382 not self.op.file_driver in constants.FILE_DRIVER):
9383 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9384 self.op.file_driver, errors.ECODE_INVAL)
9386 if self.op.disk_template == constants.DT_FILE:
9387 opcodes.RequireFileStorage()
9388 elif self.op.disk_template == constants.DT_SHARED_FILE:
9389 opcodes.RequireSharedFileStorage()
9391 ### Node/iallocator related checks
9392 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9394 if self.op.pnode is not None:
9395 if self.op.disk_template in constants.DTS_INT_MIRROR:
9396 if self.op.snode is None:
9397 raise errors.OpPrereqError("The networked disk templates need"
9398 " a mirror node", errors.ECODE_INVAL)
9400 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9402 self.op.snode = None
9404 self._cds = _GetClusterDomainSecret()
9406 if self.op.mode == constants.INSTANCE_IMPORT:
9407 # On import force_variant must be True, because if we forced it at
9408 # initial install, our only chance when importing it back is that it
9410 self.op.force_variant = True
9412 if self.op.no_install:
9413 self.LogInfo("No-installation mode has no effect during import")
9415 elif self.op.mode == constants.INSTANCE_CREATE:
9416 if self.op.os_type is None:
9417 raise errors.OpPrereqError("No guest OS specified",
9419 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9420 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9421 " installation" % self.op.os_type,
9423 if self.op.disk_template is None:
9424 raise errors.OpPrereqError("No disk template specified",
9427 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9428 # Check handshake to ensure both clusters have the same domain secret
9429 src_handshake = self.op.source_handshake
9430 if not src_handshake:
9431 raise errors.OpPrereqError("Missing source handshake",
9434 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9437 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9440 # Load and check source CA
9441 self.source_x509_ca_pem = self.op.source_x509_ca
9442 if not self.source_x509_ca_pem:
9443 raise errors.OpPrereqError("Missing source X509 CA",
9447 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9449 except OpenSSL.crypto.Error, err:
9450 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9451 (err, ), errors.ECODE_INVAL)
9453 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9454 if errcode is not None:
9455 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9458 self.source_x509_ca = cert
9460 src_instance_name = self.op.source_instance_name
9461 if not src_instance_name:
9462 raise errors.OpPrereqError("Missing source instance name",
9465 self.source_instance_name = \
9466 netutils.GetHostname(name=src_instance_name).name
9469 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9470 self.op.mode, errors.ECODE_INVAL)
9472 def ExpandNames(self):
9473 """ExpandNames for CreateInstance.
9475 Figure out the right locks for instance creation.
9478 self.needed_locks = {}
9480 instance_name = self.op.instance_name
9481 # this is just a preventive check, but someone might still add this
9482 # instance in the meantime, and creation will fail at lock-add time
9483 if instance_name in self.cfg.GetInstanceList():
9484 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9485 instance_name, errors.ECODE_EXISTS)
9487 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9489 if self.op.iallocator:
9490 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9491 # specifying a group on instance creation and then selecting nodes from
9493 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9494 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9496 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9497 nodelist = [self.op.pnode]
9498 if self.op.snode is not None:
9499 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9500 nodelist.append(self.op.snode)
9501 self.needed_locks[locking.LEVEL_NODE] = nodelist
9502 # Lock resources of instance's primary and secondary nodes (copy to
9503 # prevent accidential modification)
9504 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9506 # in case of import lock the source node too
9507 if self.op.mode == constants.INSTANCE_IMPORT:
9508 src_node = self.op.src_node
9509 src_path = self.op.src_path
9511 if src_path is None:
9512 self.op.src_path = src_path = self.op.instance_name
9514 if src_node is None:
9515 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9516 self.op.src_node = None
9517 if os.path.isabs(src_path):
9518 raise errors.OpPrereqError("Importing an instance from a path"
9519 " requires a source node option",
9522 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9523 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9524 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9525 if not os.path.isabs(src_path):
9526 self.op.src_path = src_path = \
9527 utils.PathJoin(constants.EXPORT_DIR, src_path)
9529 def _RunAllocator(self):
9530 """Run the allocator based on input opcode.
9533 nics = [n.ToDict() for n in self.nics]
9534 ial = IAllocator(self.cfg, self.rpc,
9535 mode=constants.IALLOCATOR_MODE_ALLOC,
9536 name=self.op.instance_name,
9537 disk_template=self.op.disk_template,
9540 vcpus=self.be_full[constants.BE_VCPUS],
9541 memory=self.be_full[constants.BE_MAXMEM],
9542 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9545 hypervisor=self.op.hypervisor,
9548 ial.Run(self.op.iallocator)
9551 raise errors.OpPrereqError("Can't compute nodes using"
9552 " iallocator '%s': %s" %
9553 (self.op.iallocator, ial.info),
9555 if len(ial.result) != ial.required_nodes:
9556 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9557 " of nodes (%s), required %s" %
9558 (self.op.iallocator, len(ial.result),
9559 ial.required_nodes), errors.ECODE_FAULT)
9560 self.op.pnode = ial.result[0]
9561 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9562 self.op.instance_name, self.op.iallocator,
9563 utils.CommaJoin(ial.result))
9564 if ial.required_nodes == 2:
9565 self.op.snode = ial.result[1]
9567 def BuildHooksEnv(self):
9570 This runs on master, primary and secondary nodes of the instance.
9574 "ADD_MODE": self.op.mode,
9576 if self.op.mode == constants.INSTANCE_IMPORT:
9577 env["SRC_NODE"] = self.op.src_node
9578 env["SRC_PATH"] = self.op.src_path
9579 env["SRC_IMAGES"] = self.src_images
9581 env.update(_BuildInstanceHookEnv(
9582 name=self.op.instance_name,
9583 primary_node=self.op.pnode,
9584 secondary_nodes=self.secondaries,
9585 status=self.op.start,
9586 os_type=self.op.os_type,
9587 minmem=self.be_full[constants.BE_MINMEM],
9588 maxmem=self.be_full[constants.BE_MAXMEM],
9589 vcpus=self.be_full[constants.BE_VCPUS],
9590 nics=_NICListToTuple(self, self.nics),
9591 disk_template=self.op.disk_template,
9592 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9593 for d in self.disks],
9596 hypervisor_name=self.op.hypervisor,
9602 def BuildHooksNodes(self):
9603 """Build hooks nodes.
9606 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9609 def _ReadExportInfo(self):
9610 """Reads the export information from disk.
9612 It will override the opcode source node and path with the actual
9613 information, if these two were not specified before.
9615 @return: the export information
9618 assert self.op.mode == constants.INSTANCE_IMPORT
9620 src_node = self.op.src_node
9621 src_path = self.op.src_path
9623 if src_node is None:
9624 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9625 exp_list = self.rpc.call_export_list(locked_nodes)
9627 for node in exp_list:
9628 if exp_list[node].fail_msg:
9630 if src_path in exp_list[node].payload:
9632 self.op.src_node = src_node = node
9633 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9637 raise errors.OpPrereqError("No export found for relative path %s" %
9638 src_path, errors.ECODE_INVAL)
9640 _CheckNodeOnline(self, src_node)
9641 result = self.rpc.call_export_info(src_node, src_path)
9642 result.Raise("No export or invalid export found in dir %s" % src_path)
9644 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9645 if not export_info.has_section(constants.INISECT_EXP):
9646 raise errors.ProgrammerError("Corrupted export config",
9647 errors.ECODE_ENVIRON)
9649 ei_version = export_info.get(constants.INISECT_EXP, "version")
9650 if (int(ei_version) != constants.EXPORT_VERSION):
9651 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9652 (ei_version, constants.EXPORT_VERSION),
9653 errors.ECODE_ENVIRON)
9656 def _ReadExportParams(self, einfo):
9657 """Use export parameters as defaults.
9659 In case the opcode doesn't specify (as in override) some instance
9660 parameters, then try to use them from the export information, if
9664 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9666 if self.op.disk_template is None:
9667 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9668 self.op.disk_template = einfo.get(constants.INISECT_INS,
9670 if self.op.disk_template not in constants.DISK_TEMPLATES:
9671 raise errors.OpPrereqError("Disk template specified in configuration"
9672 " file is not one of the allowed values:"
9673 " %s" % " ".join(constants.DISK_TEMPLATES))
9675 raise errors.OpPrereqError("No disk template specified and the export"
9676 " is missing the disk_template information",
9679 if not self.op.disks:
9681 # TODO: import the disk iv_name too
9682 for idx in range(constants.MAX_DISKS):
9683 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9684 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9685 disks.append({constants.IDISK_SIZE: disk_sz})
9686 self.op.disks = disks
9687 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9688 raise errors.OpPrereqError("No disk info specified and the export"
9689 " is missing the disk information",
9692 if not self.op.nics:
9694 for idx in range(constants.MAX_NICS):
9695 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9697 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9698 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9705 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9706 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9708 if (self.op.hypervisor is None and
9709 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9710 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9712 if einfo.has_section(constants.INISECT_HYP):
9713 # use the export parameters but do not override the ones
9714 # specified by the user
9715 for name, value in einfo.items(constants.INISECT_HYP):
9716 if name not in self.op.hvparams:
9717 self.op.hvparams[name] = value
9719 if einfo.has_section(constants.INISECT_BEP):
9720 # use the parameters, without overriding
9721 for name, value in einfo.items(constants.INISECT_BEP):
9722 if name not in self.op.beparams:
9723 self.op.beparams[name] = value
9724 # Compatibility for the old "memory" be param
9725 if name == constants.BE_MEMORY:
9726 if constants.BE_MAXMEM not in self.op.beparams:
9727 self.op.beparams[constants.BE_MAXMEM] = value
9728 if constants.BE_MINMEM not in self.op.beparams:
9729 self.op.beparams[constants.BE_MINMEM] = value
9731 # try to read the parameters old style, from the main section
9732 for name in constants.BES_PARAMETERS:
9733 if (name not in self.op.beparams and
9734 einfo.has_option(constants.INISECT_INS, name)):
9735 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9737 if einfo.has_section(constants.INISECT_OSP):
9738 # use the parameters, without overriding
9739 for name, value in einfo.items(constants.INISECT_OSP):
9740 if name not in self.op.osparams:
9741 self.op.osparams[name] = value
9743 def _RevertToDefaults(self, cluster):
9744 """Revert the instance parameters to the default values.
9748 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9749 for name in self.op.hvparams.keys():
9750 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9751 del self.op.hvparams[name]
9753 be_defs = cluster.SimpleFillBE({})
9754 for name in self.op.beparams.keys():
9755 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9756 del self.op.beparams[name]
9758 nic_defs = cluster.SimpleFillNIC({})
9759 for nic in self.op.nics:
9760 for name in constants.NICS_PARAMETERS:
9761 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9764 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9765 for name in self.op.osparams.keys():
9766 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9767 del self.op.osparams[name]
9769 def _CalculateFileStorageDir(self):
9770 """Calculate final instance file storage dir.
9773 # file storage dir calculation/check
9774 self.instance_file_storage_dir = None
9775 if self.op.disk_template in constants.DTS_FILEBASED:
9776 # build the full file storage dir path
9779 if self.op.disk_template == constants.DT_SHARED_FILE:
9780 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9782 get_fsd_fn = self.cfg.GetFileStorageDir
9784 cfg_storagedir = get_fsd_fn()
9785 if not cfg_storagedir:
9786 raise errors.OpPrereqError("Cluster file storage dir not defined")
9787 joinargs.append(cfg_storagedir)
9789 if self.op.file_storage_dir is not None:
9790 joinargs.append(self.op.file_storage_dir)
9792 joinargs.append(self.op.instance_name)
9794 # pylint: disable=W0142
9795 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9797 def CheckPrereq(self): # pylint: disable=R0914
9798 """Check prerequisites.
9801 self._CalculateFileStorageDir()
9803 if self.op.mode == constants.INSTANCE_IMPORT:
9804 export_info = self._ReadExportInfo()
9805 self._ReadExportParams(export_info)
9806 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9808 self._old_instance_name = None
9810 if (not self.cfg.GetVGName() and
9811 self.op.disk_template not in constants.DTS_NOT_LVM):
9812 raise errors.OpPrereqError("Cluster does not support lvm-based"
9813 " instances", errors.ECODE_STATE)
9815 if (self.op.hypervisor is None or
9816 self.op.hypervisor == constants.VALUE_AUTO):
9817 self.op.hypervisor = self.cfg.GetHypervisorType()
9819 cluster = self.cfg.GetClusterInfo()
9820 enabled_hvs = cluster.enabled_hypervisors
9821 if self.op.hypervisor not in enabled_hvs:
9822 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9823 " cluster (%s)" % (self.op.hypervisor,
9824 ",".join(enabled_hvs)),
9827 # Check tag validity
9828 for tag in self.op.tags:
9829 objects.TaggableObject.ValidateTag(tag)
9831 # check hypervisor parameter syntax (locally)
9832 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9833 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9835 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9836 hv_type.CheckParameterSyntax(filled_hvp)
9837 self.hv_full = filled_hvp
9838 # check that we don't specify global parameters on an instance
9839 _CheckGlobalHvParams(self.op.hvparams)
9841 # fill and remember the beparams dict
9842 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9843 for param, value in self.op.beparams.iteritems():
9844 if value == constants.VALUE_AUTO:
9845 self.op.beparams[param] = default_beparams[param]
9846 objects.UpgradeBeParams(self.op.beparams)
9847 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9848 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9850 # build os parameters
9851 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9853 # now that hvp/bep are in final format, let's reset to defaults,
9855 if self.op.identify_defaults:
9856 self._RevertToDefaults(cluster)
9860 for idx, nic in enumerate(self.op.nics):
9861 nic_mode_req = nic.get(constants.INIC_MODE, None)
9862 nic_mode = nic_mode_req
9863 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9864 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9866 # in routed mode, for the first nic, the default ip is 'auto'
9867 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9868 default_ip_mode = constants.VALUE_AUTO
9870 default_ip_mode = constants.VALUE_NONE
9872 # ip validity checks
9873 ip = nic.get(constants.INIC_IP, default_ip_mode)
9874 if ip is None or ip.lower() == constants.VALUE_NONE:
9876 elif ip.lower() == constants.VALUE_AUTO:
9877 if not self.op.name_check:
9878 raise errors.OpPrereqError("IP address set to auto but name checks"
9879 " have been skipped",
9881 nic_ip = self.hostname1.ip
9883 if not netutils.IPAddress.IsValid(ip):
9884 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9888 # TODO: check the ip address for uniqueness
9889 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9890 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9893 # MAC address verification
9894 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9895 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9896 mac = utils.NormalizeAndValidateMac(mac)
9899 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9900 except errors.ReservationError:
9901 raise errors.OpPrereqError("MAC address %s already in use"
9902 " in cluster" % mac,
9903 errors.ECODE_NOTUNIQUE)
9905 # Build nic parameters
9906 link = nic.get(constants.INIC_LINK, None)
9907 if link == constants.VALUE_AUTO:
9908 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9911 nicparams[constants.NIC_MODE] = nic_mode
9913 nicparams[constants.NIC_LINK] = link
9915 check_params = cluster.SimpleFillNIC(nicparams)
9916 objects.NIC.CheckParameterSyntax(check_params)
9917 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9919 # disk checks/pre-build
9920 default_vg = self.cfg.GetVGName()
9922 for disk in self.op.disks:
9923 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9924 if mode not in constants.DISK_ACCESS_SET:
9925 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9926 mode, errors.ECODE_INVAL)
9927 size = disk.get(constants.IDISK_SIZE, None)
9929 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9932 except (TypeError, ValueError):
9933 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9936 data_vg = disk.get(constants.IDISK_VG, default_vg)
9938 constants.IDISK_SIZE: size,
9939 constants.IDISK_MODE: mode,
9940 constants.IDISK_VG: data_vg,
9942 if constants.IDISK_METAVG in disk:
9943 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9944 if constants.IDISK_ADOPT in disk:
9945 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9946 self.disks.append(new_disk)
9948 if self.op.mode == constants.INSTANCE_IMPORT:
9950 for idx in range(len(self.disks)):
9951 option = "disk%d_dump" % idx
9952 if export_info.has_option(constants.INISECT_INS, option):
9953 # FIXME: are the old os-es, disk sizes, etc. useful?
9954 export_name = export_info.get(constants.INISECT_INS, option)
9955 image = utils.PathJoin(self.op.src_path, export_name)
9956 disk_images.append(image)
9958 disk_images.append(False)
9960 self.src_images = disk_images
9962 if self.op.instance_name == self._old_instance_name:
9963 for idx, nic in enumerate(self.nics):
9964 if nic.mac == constants.VALUE_AUTO:
9965 nic_mac_ini = "nic%d_mac" % idx
9966 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9968 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9970 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9971 if self.op.ip_check:
9972 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9973 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9974 (self.check_ip, self.op.instance_name),
9975 errors.ECODE_NOTUNIQUE)
9977 #### mac address generation
9978 # By generating here the mac address both the allocator and the hooks get
9979 # the real final mac address rather than the 'auto' or 'generate' value.
9980 # There is a race condition between the generation and the instance object
9981 # creation, which means that we know the mac is valid now, but we're not
9982 # sure it will be when we actually add the instance. If things go bad
9983 # adding the instance will abort because of a duplicate mac, and the
9984 # creation job will fail.
9985 for nic in self.nics:
9986 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9987 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9991 if self.op.iallocator is not None:
9992 self._RunAllocator()
9994 # Release all unneeded node locks
9995 _ReleaseLocks(self, locking.LEVEL_NODE,
9996 keep=filter(None, [self.op.pnode, self.op.snode,
9998 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9999 keep=filter(None, [self.op.pnode, self.op.snode,
10000 self.op.src_node]))
10002 #### node related checks
10004 # check primary node
10005 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10006 assert self.pnode is not None, \
10007 "Cannot retrieve locked node %s" % self.op.pnode
10009 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10010 pnode.name, errors.ECODE_STATE)
10012 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10013 pnode.name, errors.ECODE_STATE)
10014 if not pnode.vm_capable:
10015 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10016 " '%s'" % pnode.name, errors.ECODE_STATE)
10018 self.secondaries = []
10020 # mirror node verification
10021 if self.op.disk_template in constants.DTS_INT_MIRROR:
10022 if self.op.snode == pnode.name:
10023 raise errors.OpPrereqError("The secondary node cannot be the"
10024 " primary node", errors.ECODE_INVAL)
10025 _CheckNodeOnline(self, self.op.snode)
10026 _CheckNodeNotDrained(self, self.op.snode)
10027 _CheckNodeVmCapable(self, self.op.snode)
10028 self.secondaries.append(self.op.snode)
10030 snode = self.cfg.GetNodeInfo(self.op.snode)
10031 if pnode.group != snode.group:
10032 self.LogWarning("The primary and secondary nodes are in two"
10033 " different node groups; the disk parameters"
10034 " from the first disk's node group will be"
10037 nodenames = [pnode.name] + self.secondaries
10039 # Verify instance specs
10040 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10042 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10043 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10044 constants.ISPEC_DISK_COUNT: len(self.disks),
10045 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10046 constants.ISPEC_NIC_COUNT: len(self.nics),
10047 constants.ISPEC_SPINDLE_USE: spindle_use,
10050 group_info = self.cfg.GetNodeGroup(pnode.group)
10051 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10052 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10053 if not self.op.ignore_ipolicy and res:
10054 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10055 " policy: %s") % (pnode.group,
10056 utils.CommaJoin(res)),
10057 errors.ECODE_INVAL)
10059 if not self.adopt_disks:
10060 if self.op.disk_template == constants.DT_RBD:
10061 # _CheckRADOSFreeSpace() is just a placeholder.
10062 # Any function that checks prerequisites can be placed here.
10063 # Check if there is enough space on the RADOS cluster.
10064 _CheckRADOSFreeSpace()
10066 # Check lv size requirements, if not adopting
10067 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10068 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10070 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10071 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10072 disk[constants.IDISK_ADOPT])
10073 for disk in self.disks])
10074 if len(all_lvs) != len(self.disks):
10075 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10076 errors.ECODE_INVAL)
10077 for lv_name in all_lvs:
10079 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10080 # to ReserveLV uses the same syntax
10081 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10082 except errors.ReservationError:
10083 raise errors.OpPrereqError("LV named %s used by another instance" %
10084 lv_name, errors.ECODE_NOTUNIQUE)
10086 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10087 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10089 node_lvs = self.rpc.call_lv_list([pnode.name],
10090 vg_names.payload.keys())[pnode.name]
10091 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10092 node_lvs = node_lvs.payload
10094 delta = all_lvs.difference(node_lvs.keys())
10096 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10097 utils.CommaJoin(delta),
10098 errors.ECODE_INVAL)
10099 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10101 raise errors.OpPrereqError("Online logical volumes found, cannot"
10102 " adopt: %s" % utils.CommaJoin(online_lvs),
10103 errors.ECODE_STATE)
10104 # update the size of disk based on what is found
10105 for dsk in self.disks:
10106 dsk[constants.IDISK_SIZE] = \
10107 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10108 dsk[constants.IDISK_ADOPT])][0]))
10110 elif self.op.disk_template == constants.DT_BLOCK:
10111 # Normalize and de-duplicate device paths
10112 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10113 for disk in self.disks])
10114 if len(all_disks) != len(self.disks):
10115 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10116 errors.ECODE_INVAL)
10117 baddisks = [d for d in all_disks
10118 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10120 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10121 " cannot be adopted" %
10122 (", ".join(baddisks),
10123 constants.ADOPTABLE_BLOCKDEV_ROOT),
10124 errors.ECODE_INVAL)
10126 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10127 list(all_disks))[pnode.name]
10128 node_disks.Raise("Cannot get block device information from node %s" %
10130 node_disks = node_disks.payload
10131 delta = all_disks.difference(node_disks.keys())
10133 raise errors.OpPrereqError("Missing block device(s): %s" %
10134 utils.CommaJoin(delta),
10135 errors.ECODE_INVAL)
10136 for dsk in self.disks:
10137 dsk[constants.IDISK_SIZE] = \
10138 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10140 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10142 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10143 # check OS parameters (remotely)
10144 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10146 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10148 # memory check on primary node
10149 #TODO(dynmem): use MINMEM for checking
10151 _CheckNodeFreeMemory(self, self.pnode.name,
10152 "creating instance %s" % self.op.instance_name,
10153 self.be_full[constants.BE_MAXMEM],
10154 self.op.hypervisor)
10156 self.dry_run_result = list(nodenames)
10158 def Exec(self, feedback_fn):
10159 """Create and add the instance to the cluster.
10162 instance = self.op.instance_name
10163 pnode_name = self.pnode.name
10165 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10166 self.owned_locks(locking.LEVEL_NODE)), \
10167 "Node locks differ from node resource locks"
10169 ht_kind = self.op.hypervisor
10170 if ht_kind in constants.HTS_REQ_PORT:
10171 network_port = self.cfg.AllocatePort()
10173 network_port = None
10175 # This is ugly but we got a chicken-egg problem here
10176 # We can only take the group disk parameters, as the instance
10177 # has no disks yet (we are generating them right here).
10178 node = self.cfg.GetNodeInfo(pnode_name)
10179 nodegroup = self.cfg.GetNodeGroup(node.group)
10180 disks = _GenerateDiskTemplate(self,
10181 self.op.disk_template,
10182 instance, pnode_name,
10185 self.instance_file_storage_dir,
10186 self.op.file_driver,
10189 self.cfg.GetGroupDiskParams(nodegroup))
10191 iobj = objects.Instance(name=instance, os=self.op.os_type,
10192 primary_node=pnode_name,
10193 nics=self.nics, disks=disks,
10194 disk_template=self.op.disk_template,
10195 admin_state=constants.ADMINST_DOWN,
10196 network_port=network_port,
10197 beparams=self.op.beparams,
10198 hvparams=self.op.hvparams,
10199 hypervisor=self.op.hypervisor,
10200 osparams=self.op.osparams,
10204 for tag in self.op.tags:
10207 if self.adopt_disks:
10208 if self.op.disk_template == constants.DT_PLAIN:
10209 # rename LVs to the newly-generated names; we need to construct
10210 # 'fake' LV disks with the old data, plus the new unique_id
10211 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10213 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10214 rename_to.append(t_dsk.logical_id)
10215 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10216 self.cfg.SetDiskID(t_dsk, pnode_name)
10217 result = self.rpc.call_blockdev_rename(pnode_name,
10218 zip(tmp_disks, rename_to))
10219 result.Raise("Failed to rename adoped LVs")
10221 feedback_fn("* creating instance disks...")
10223 _CreateDisks(self, iobj)
10224 except errors.OpExecError:
10225 self.LogWarning("Device creation failed, reverting...")
10227 _RemoveDisks(self, iobj)
10229 self.cfg.ReleaseDRBDMinors(instance)
10232 feedback_fn("adding instance %s to cluster config" % instance)
10234 self.cfg.AddInstance(iobj, self.proc.GetECId())
10236 # Declare that we don't want to remove the instance lock anymore, as we've
10237 # added the instance to the config
10238 del self.remove_locks[locking.LEVEL_INSTANCE]
10240 if self.op.mode == constants.INSTANCE_IMPORT:
10241 # Release unused nodes
10242 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10244 # Release all nodes
10245 _ReleaseLocks(self, locking.LEVEL_NODE)
10248 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10249 feedback_fn("* wiping instance disks...")
10251 _WipeDisks(self, iobj)
10252 except errors.OpExecError, err:
10253 logging.exception("Wiping disks failed")
10254 self.LogWarning("Wiping instance disks failed (%s)", err)
10258 # Something is already wrong with the disks, don't do anything else
10260 elif self.op.wait_for_sync:
10261 disk_abort = not _WaitForSync(self, iobj)
10262 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10263 # make sure the disks are not degraded (still sync-ing is ok)
10264 feedback_fn("* checking mirrors status")
10265 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10270 _RemoveDisks(self, iobj)
10271 self.cfg.RemoveInstance(iobj.name)
10272 # Make sure the instance lock gets removed
10273 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10274 raise errors.OpExecError("There are some degraded disks for"
10277 # Release all node resource locks
10278 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10280 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10281 # we need to set the disks ID to the primary node, since the
10282 # preceding code might or might have not done it, depending on
10283 # disk template and other options
10284 for disk in iobj.disks:
10285 self.cfg.SetDiskID(disk, pnode_name)
10286 if self.op.mode == constants.INSTANCE_CREATE:
10287 if not self.op.no_install:
10288 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10289 not self.op.wait_for_sync)
10291 feedback_fn("* pausing disk sync to install instance OS")
10292 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10295 for idx, success in enumerate(result.payload):
10297 logging.warn("pause-sync of instance %s for disk %d failed",
10300 feedback_fn("* running the instance OS create scripts...")
10301 # FIXME: pass debug option from opcode to backend
10303 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10304 self.op.debug_level)
10306 feedback_fn("* resuming disk sync")
10307 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10310 for idx, success in enumerate(result.payload):
10312 logging.warn("resume-sync of instance %s for disk %d failed",
10315 os_add_result.Raise("Could not add os for instance %s"
10316 " on node %s" % (instance, pnode_name))
10319 if self.op.mode == constants.INSTANCE_IMPORT:
10320 feedback_fn("* running the instance OS import scripts...")
10324 for idx, image in enumerate(self.src_images):
10328 # FIXME: pass debug option from opcode to backend
10329 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10330 constants.IEIO_FILE, (image, ),
10331 constants.IEIO_SCRIPT,
10332 (iobj.disks[idx], idx),
10334 transfers.append(dt)
10337 masterd.instance.TransferInstanceData(self, feedback_fn,
10338 self.op.src_node, pnode_name,
10339 self.pnode.secondary_ip,
10341 if not compat.all(import_result):
10342 self.LogWarning("Some disks for instance %s on node %s were not"
10343 " imported successfully" % (instance, pnode_name))
10345 rename_from = self._old_instance_name
10347 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10348 feedback_fn("* preparing remote import...")
10349 # The source cluster will stop the instance before attempting to make
10350 # a connection. In some cases stopping an instance can take a long
10351 # time, hence the shutdown timeout is added to the connection
10353 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10354 self.op.source_shutdown_timeout)
10355 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10357 assert iobj.primary_node == self.pnode.name
10359 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10360 self.source_x509_ca,
10361 self._cds, timeouts)
10362 if not compat.all(disk_results):
10363 # TODO: Should the instance still be started, even if some disks
10364 # failed to import (valid for local imports, too)?
10365 self.LogWarning("Some disks for instance %s on node %s were not"
10366 " imported successfully" % (instance, pnode_name))
10368 rename_from = self.source_instance_name
10371 # also checked in the prereq part
10372 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10375 # Run rename script on newly imported instance
10376 assert iobj.name == instance
10377 feedback_fn("Running rename script for %s" % instance)
10378 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10380 self.op.debug_level)
10381 if result.fail_msg:
10382 self.LogWarning("Failed to run rename script for %s on node"
10383 " %s: %s" % (instance, pnode_name, result.fail_msg))
10385 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10388 iobj.admin_state = constants.ADMINST_UP
10389 self.cfg.Update(iobj, feedback_fn)
10390 logging.info("Starting instance %s on node %s", instance, pnode_name)
10391 feedback_fn("* starting instance...")
10392 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10394 result.Raise("Could not start instance")
10396 return list(iobj.all_nodes)
10399 def _CheckRADOSFreeSpace():
10400 """Compute disk size requirements inside the RADOS cluster.
10403 # For the RADOS cluster we assume there is always enough space.
10407 class LUInstanceConsole(NoHooksLU):
10408 """Connect to an instance's console.
10410 This is somewhat special in that it returns the command line that
10411 you need to run on the master node in order to connect to the
10417 def ExpandNames(self):
10418 self.share_locks = _ShareAll()
10419 self._ExpandAndLockInstance()
10421 def CheckPrereq(self):
10422 """Check prerequisites.
10424 This checks that the instance is in the cluster.
10427 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10428 assert self.instance is not None, \
10429 "Cannot retrieve locked instance %s" % self.op.instance_name
10430 _CheckNodeOnline(self, self.instance.primary_node)
10432 def Exec(self, feedback_fn):
10433 """Connect to the console of an instance
10436 instance = self.instance
10437 node = instance.primary_node
10439 node_insts = self.rpc.call_instance_list([node],
10440 [instance.hypervisor])[node]
10441 node_insts.Raise("Can't get node information from %s" % node)
10443 if instance.name not in node_insts.payload:
10444 if instance.admin_state == constants.ADMINST_UP:
10445 state = constants.INSTST_ERRORDOWN
10446 elif instance.admin_state == constants.ADMINST_DOWN:
10447 state = constants.INSTST_ADMINDOWN
10449 state = constants.INSTST_ADMINOFFLINE
10450 raise errors.OpExecError("Instance %s is not running (state %s)" %
10451 (instance.name, state))
10453 logging.debug("Connecting to console of %s on %s", instance.name, node)
10455 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10458 def _GetInstanceConsole(cluster, instance):
10459 """Returns console information for an instance.
10461 @type cluster: L{objects.Cluster}
10462 @type instance: L{objects.Instance}
10466 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10467 # beparams and hvparams are passed separately, to avoid editing the
10468 # instance and then saving the defaults in the instance itself.
10469 hvparams = cluster.FillHV(instance)
10470 beparams = cluster.FillBE(instance)
10471 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10473 assert console.instance == instance.name
10474 assert console.Validate()
10476 return console.ToDict()
10479 class LUInstanceReplaceDisks(LogicalUnit):
10480 """Replace the disks of an instance.
10483 HPATH = "mirrors-replace"
10484 HTYPE = constants.HTYPE_INSTANCE
10487 def CheckArguments(self):
10488 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10489 self.op.iallocator)
10491 def ExpandNames(self):
10492 self._ExpandAndLockInstance()
10494 assert locking.LEVEL_NODE not in self.needed_locks
10495 assert locking.LEVEL_NODE_RES not in self.needed_locks
10496 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10498 assert self.op.iallocator is None or self.op.remote_node is None, \
10499 "Conflicting options"
10501 if self.op.remote_node is not None:
10502 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10504 # Warning: do not remove the locking of the new secondary here
10505 # unless DRBD8.AddChildren is changed to work in parallel;
10506 # currently it doesn't since parallel invocations of
10507 # FindUnusedMinor will conflict
10508 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10509 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10511 self.needed_locks[locking.LEVEL_NODE] = []
10512 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10514 if self.op.iallocator is not None:
10515 # iallocator will select a new node in the same group
10516 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10518 self.needed_locks[locking.LEVEL_NODE_RES] = []
10520 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10521 self.op.iallocator, self.op.remote_node,
10522 self.op.disks, False, self.op.early_release,
10523 self.op.ignore_ipolicy)
10525 self.tasklets = [self.replacer]
10527 def DeclareLocks(self, level):
10528 if level == locking.LEVEL_NODEGROUP:
10529 assert self.op.remote_node is None
10530 assert self.op.iallocator is not None
10531 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10533 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10534 # Lock all groups used by instance optimistically; this requires going
10535 # via the node before it's locked, requiring verification later on
10536 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10537 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10539 elif level == locking.LEVEL_NODE:
10540 if self.op.iallocator is not None:
10541 assert self.op.remote_node is None
10542 assert not self.needed_locks[locking.LEVEL_NODE]
10544 # Lock member nodes of all locked groups
10545 self.needed_locks[locking.LEVEL_NODE] = [node_name
10546 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10547 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10549 self._LockInstancesNodes()
10550 elif level == locking.LEVEL_NODE_RES:
10552 self.needed_locks[locking.LEVEL_NODE_RES] = \
10553 self.needed_locks[locking.LEVEL_NODE]
10555 def BuildHooksEnv(self):
10556 """Build hooks env.
10558 This runs on the master, the primary and all the secondaries.
10561 instance = self.replacer.instance
10563 "MODE": self.op.mode,
10564 "NEW_SECONDARY": self.op.remote_node,
10565 "OLD_SECONDARY": instance.secondary_nodes[0],
10567 env.update(_BuildInstanceHookEnvByObject(self, instance))
10570 def BuildHooksNodes(self):
10571 """Build hooks nodes.
10574 instance = self.replacer.instance
10576 self.cfg.GetMasterNode(),
10577 instance.primary_node,
10579 if self.op.remote_node is not None:
10580 nl.append(self.op.remote_node)
10583 def CheckPrereq(self):
10584 """Check prerequisites.
10587 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10588 self.op.iallocator is None)
10590 # Verify if node group locks are still correct
10591 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10593 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10595 return LogicalUnit.CheckPrereq(self)
10598 class TLReplaceDisks(Tasklet):
10599 """Replaces disks for an instance.
10601 Note: Locking is not within the scope of this class.
10604 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10605 disks, delay_iallocator, early_release, ignore_ipolicy):
10606 """Initializes this class.
10609 Tasklet.__init__(self, lu)
10612 self.instance_name = instance_name
10614 self.iallocator_name = iallocator_name
10615 self.remote_node = remote_node
10617 self.delay_iallocator = delay_iallocator
10618 self.early_release = early_release
10619 self.ignore_ipolicy = ignore_ipolicy
10622 self.instance = None
10623 self.new_node = None
10624 self.target_node = None
10625 self.other_node = None
10626 self.remote_node_info = None
10627 self.node_secondary_ip = None
10630 def CheckArguments(mode, remote_node, iallocator):
10631 """Helper function for users of this class.
10634 # check for valid parameter combination
10635 if mode == constants.REPLACE_DISK_CHG:
10636 if remote_node is None and iallocator is None:
10637 raise errors.OpPrereqError("When changing the secondary either an"
10638 " iallocator script must be used or the"
10639 " new node given", errors.ECODE_INVAL)
10641 if remote_node is not None and iallocator is not None:
10642 raise errors.OpPrereqError("Give either the iallocator or the new"
10643 " secondary, not both", errors.ECODE_INVAL)
10645 elif remote_node is not None or iallocator is not None:
10646 # Not replacing the secondary
10647 raise errors.OpPrereqError("The iallocator and new node options can"
10648 " only be used when changing the"
10649 " secondary node", errors.ECODE_INVAL)
10652 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10653 """Compute a new secondary node using an IAllocator.
10656 ial = IAllocator(lu.cfg, lu.rpc,
10657 mode=constants.IALLOCATOR_MODE_RELOC,
10658 name=instance_name,
10659 relocate_from=list(relocate_from))
10661 ial.Run(iallocator_name)
10663 if not ial.success:
10664 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10665 " %s" % (iallocator_name, ial.info),
10666 errors.ECODE_NORES)
10668 if len(ial.result) != ial.required_nodes:
10669 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10670 " of nodes (%s), required %s" %
10672 len(ial.result), ial.required_nodes),
10673 errors.ECODE_FAULT)
10675 remote_node_name = ial.result[0]
10677 lu.LogInfo("Selected new secondary for instance '%s': %s",
10678 instance_name, remote_node_name)
10680 return remote_node_name
10682 def _FindFaultyDisks(self, node_name):
10683 """Wrapper for L{_FindFaultyInstanceDisks}.
10686 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10689 def _CheckDisksActivated(self, instance):
10690 """Checks if the instance disks are activated.
10692 @param instance: The instance to check disks
10693 @return: True if they are activated, False otherwise
10696 nodes = instance.all_nodes
10698 for idx, dev in enumerate(instance.disks):
10700 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10701 self.cfg.SetDiskID(dev, node)
10703 result = _BlockdevFind(self, node, dev, instance)
10707 elif result.fail_msg or not result.payload:
10712 def CheckPrereq(self):
10713 """Check prerequisites.
10715 This checks that the instance is in the cluster.
10718 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10719 assert instance is not None, \
10720 "Cannot retrieve locked instance %s" % self.instance_name
10722 if instance.disk_template != constants.DT_DRBD8:
10723 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10724 " instances", errors.ECODE_INVAL)
10726 if len(instance.secondary_nodes) != 1:
10727 raise errors.OpPrereqError("The instance has a strange layout,"
10728 " expected one secondary but found %d" %
10729 len(instance.secondary_nodes),
10730 errors.ECODE_FAULT)
10732 if not self.delay_iallocator:
10733 self._CheckPrereq2()
10735 def _CheckPrereq2(self):
10736 """Check prerequisites, second part.
10738 This function should always be part of CheckPrereq. It was separated and is
10739 now called from Exec because during node evacuation iallocator was only
10740 called with an unmodified cluster model, not taking planned changes into
10744 instance = self.instance
10745 secondary_node = instance.secondary_nodes[0]
10747 if self.iallocator_name is None:
10748 remote_node = self.remote_node
10750 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10751 instance.name, instance.secondary_nodes)
10753 if remote_node is None:
10754 self.remote_node_info = None
10756 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10757 "Remote node '%s' is not locked" % remote_node
10759 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10760 assert self.remote_node_info is not None, \
10761 "Cannot retrieve locked node %s" % remote_node
10763 if remote_node == self.instance.primary_node:
10764 raise errors.OpPrereqError("The specified node is the primary node of"
10765 " the instance", errors.ECODE_INVAL)
10767 if remote_node == secondary_node:
10768 raise errors.OpPrereqError("The specified node is already the"
10769 " secondary node of the instance",
10770 errors.ECODE_INVAL)
10772 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10773 constants.REPLACE_DISK_CHG):
10774 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10775 errors.ECODE_INVAL)
10777 if self.mode == constants.REPLACE_DISK_AUTO:
10778 if not self._CheckDisksActivated(instance):
10779 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10780 " first" % self.instance_name,
10781 errors.ECODE_STATE)
10782 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10783 faulty_secondary = self._FindFaultyDisks(secondary_node)
10785 if faulty_primary and faulty_secondary:
10786 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10787 " one node and can not be repaired"
10788 " automatically" % self.instance_name,
10789 errors.ECODE_STATE)
10792 self.disks = faulty_primary
10793 self.target_node = instance.primary_node
10794 self.other_node = secondary_node
10795 check_nodes = [self.target_node, self.other_node]
10796 elif faulty_secondary:
10797 self.disks = faulty_secondary
10798 self.target_node = secondary_node
10799 self.other_node = instance.primary_node
10800 check_nodes = [self.target_node, self.other_node]
10806 # Non-automatic modes
10807 if self.mode == constants.REPLACE_DISK_PRI:
10808 self.target_node = instance.primary_node
10809 self.other_node = secondary_node
10810 check_nodes = [self.target_node, self.other_node]
10812 elif self.mode == constants.REPLACE_DISK_SEC:
10813 self.target_node = secondary_node
10814 self.other_node = instance.primary_node
10815 check_nodes = [self.target_node, self.other_node]
10817 elif self.mode == constants.REPLACE_DISK_CHG:
10818 self.new_node = remote_node
10819 self.other_node = instance.primary_node
10820 self.target_node = secondary_node
10821 check_nodes = [self.new_node, self.other_node]
10823 _CheckNodeNotDrained(self.lu, remote_node)
10824 _CheckNodeVmCapable(self.lu, remote_node)
10826 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10827 assert old_node_info is not None
10828 if old_node_info.offline and not self.early_release:
10829 # doesn't make sense to delay the release
10830 self.early_release = True
10831 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10832 " early-release mode", secondary_node)
10835 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10838 # If not specified all disks should be replaced
10840 self.disks = range(len(self.instance.disks))
10842 # TODO: This is ugly, but right now we can't distinguish between internal
10843 # submitted opcode and external one. We should fix that.
10844 if self.remote_node_info:
10845 # We change the node, lets verify it still meets instance policy
10846 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10847 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10849 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10850 ignore=self.ignore_ipolicy)
10852 for node in check_nodes:
10853 _CheckNodeOnline(self.lu, node)
10855 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10858 if node_name is not None)
10860 # Release unneeded node and node resource locks
10861 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10862 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10864 # Release any owned node group
10865 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10866 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10868 # Check whether disks are valid
10869 for disk_idx in self.disks:
10870 instance.FindDisk(disk_idx)
10872 # Get secondary node IP addresses
10873 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10874 in self.cfg.GetMultiNodeInfo(touched_nodes))
10876 def Exec(self, feedback_fn):
10877 """Execute disk replacement.
10879 This dispatches the disk replacement to the appropriate handler.
10882 if self.delay_iallocator:
10883 self._CheckPrereq2()
10886 # Verify owned locks before starting operation
10887 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10888 assert set(owned_nodes) == set(self.node_secondary_ip), \
10889 ("Incorrect node locks, owning %s, expected %s" %
10890 (owned_nodes, self.node_secondary_ip.keys()))
10891 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10892 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10894 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10895 assert list(owned_instances) == [self.instance_name], \
10896 "Instance '%s' not locked" % self.instance_name
10898 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10899 "Should not own any node group lock at this point"
10902 feedback_fn("No disks need replacement")
10905 feedback_fn("Replacing disk(s) %s for %s" %
10906 (utils.CommaJoin(self.disks), self.instance.name))
10908 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10910 # Activate the instance disks if we're replacing them on a down instance
10912 _StartInstanceDisks(self.lu, self.instance, True)
10915 # Should we replace the secondary node?
10916 if self.new_node is not None:
10917 fn = self._ExecDrbd8Secondary
10919 fn = self._ExecDrbd8DiskOnly
10921 result = fn(feedback_fn)
10923 # Deactivate the instance disks if we're replacing them on a
10926 _SafeShutdownInstanceDisks(self.lu, self.instance)
10928 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10931 # Verify owned locks
10932 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10933 nodes = frozenset(self.node_secondary_ip)
10934 assert ((self.early_release and not owned_nodes) or
10935 (not self.early_release and not (set(owned_nodes) - nodes))), \
10936 ("Not owning the correct locks, early_release=%s, owned=%r,"
10937 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10941 def _CheckVolumeGroup(self, nodes):
10942 self.lu.LogInfo("Checking volume groups")
10944 vgname = self.cfg.GetVGName()
10946 # Make sure volume group exists on all involved nodes
10947 results = self.rpc.call_vg_list(nodes)
10949 raise errors.OpExecError("Can't list volume groups on the nodes")
10952 res = results[node]
10953 res.Raise("Error checking node %s" % node)
10954 if vgname not in res.payload:
10955 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10958 def _CheckDisksExistence(self, nodes):
10959 # Check disk existence
10960 for idx, dev in enumerate(self.instance.disks):
10961 if idx not in self.disks:
10965 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10966 self.cfg.SetDiskID(dev, node)
10968 result = _BlockdevFind(self, node, dev, self.instance)
10970 msg = result.fail_msg
10971 if msg or not result.payload:
10973 msg = "disk not found"
10974 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10977 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10978 for idx, dev in enumerate(self.instance.disks):
10979 if idx not in self.disks:
10982 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10985 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10986 on_primary, ldisk=ldisk):
10987 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10988 " replace disks for instance %s" %
10989 (node_name, self.instance.name))
10991 def _CreateNewStorage(self, node_name):
10992 """Create new storage on the primary or secondary node.
10994 This is only used for same-node replaces, not for changing the
10995 secondary node, hence we don't want to modify the existing disk.
11000 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11001 for idx, dev in enumerate(disks):
11002 if idx not in self.disks:
11005 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11007 self.cfg.SetDiskID(dev, node_name)
11009 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11010 names = _GenerateUniqueNames(self.lu, lv_names)
11012 (data_disk, meta_disk) = dev.children
11013 vg_data = data_disk.logical_id[0]
11014 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11015 logical_id=(vg_data, names[0]),
11016 params=data_disk.params)
11017 vg_meta = meta_disk.logical_id[0]
11018 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
11019 logical_id=(vg_meta, names[1]),
11020 params=meta_disk.params)
11022 new_lvs = [lv_data, lv_meta]
11023 old_lvs = [child.Copy() for child in dev.children]
11024 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11026 # we pass force_create=True to force the LVM creation
11027 for new_lv in new_lvs:
11028 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11029 _GetInstanceInfoText(self.instance), False)
11033 def _CheckDevices(self, node_name, iv_names):
11034 for name, (dev, _, _) in iv_names.iteritems():
11035 self.cfg.SetDiskID(dev, node_name)
11037 result = _BlockdevFind(self, node_name, dev, self.instance)
11039 msg = result.fail_msg
11040 if msg or not result.payload:
11042 msg = "disk not found"
11043 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11046 if result.payload.is_degraded:
11047 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11049 def _RemoveOldStorage(self, node_name, iv_names):
11050 for name, (_, old_lvs, _) in iv_names.iteritems():
11051 self.lu.LogInfo("Remove logical volumes for %s" % name)
11054 self.cfg.SetDiskID(lv, node_name)
11056 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11058 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11059 hint="remove unused LVs manually")
11061 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11062 """Replace a disk on the primary or secondary for DRBD 8.
11064 The algorithm for replace is quite complicated:
11066 1. for each disk to be replaced:
11068 1. create new LVs on the target node with unique names
11069 1. detach old LVs from the drbd device
11070 1. rename old LVs to name_replaced.<time_t>
11071 1. rename new LVs to old LVs
11072 1. attach the new LVs (with the old names now) to the drbd device
11074 1. wait for sync across all devices
11076 1. for each modified disk:
11078 1. remove old LVs (which have the name name_replaces.<time_t>)
11080 Failures are not very well handled.
11085 # Step: check device activation
11086 self.lu.LogStep(1, steps_total, "Check device existence")
11087 self._CheckDisksExistence([self.other_node, self.target_node])
11088 self._CheckVolumeGroup([self.target_node, self.other_node])
11090 # Step: check other node consistency
11091 self.lu.LogStep(2, steps_total, "Check peer consistency")
11092 self._CheckDisksConsistency(self.other_node,
11093 self.other_node == self.instance.primary_node,
11096 # Step: create new storage
11097 self.lu.LogStep(3, steps_total, "Allocate new storage")
11098 iv_names = self._CreateNewStorage(self.target_node)
11100 # Step: for each lv, detach+rename*2+attach
11101 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11102 for dev, old_lvs, new_lvs in iv_names.itervalues():
11103 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11105 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11107 result.Raise("Can't detach drbd from local storage on node"
11108 " %s for device %s" % (self.target_node, dev.iv_name))
11110 #cfg.Update(instance)
11112 # ok, we created the new LVs, so now we know we have the needed
11113 # storage; as such, we proceed on the target node to rename
11114 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11115 # using the assumption that logical_id == physical_id (which in
11116 # turn is the unique_id on that node)
11118 # FIXME(iustin): use a better name for the replaced LVs
11119 temp_suffix = int(time.time())
11120 ren_fn = lambda d, suff: (d.physical_id[0],
11121 d.physical_id[1] + "_replaced-%s" % suff)
11123 # Build the rename list based on what LVs exist on the node
11124 rename_old_to_new = []
11125 for to_ren in old_lvs:
11126 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11127 if not result.fail_msg and result.payload:
11129 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11131 self.lu.LogInfo("Renaming the old LVs on the target node")
11132 result = self.rpc.call_blockdev_rename(self.target_node,
11134 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11136 # Now we rename the new LVs to the old LVs
11137 self.lu.LogInfo("Renaming the new LVs on the target node")
11138 rename_new_to_old = [(new, old.physical_id)
11139 for old, new in zip(old_lvs, new_lvs)]
11140 result = self.rpc.call_blockdev_rename(self.target_node,
11142 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11144 # Intermediate steps of in memory modifications
11145 for old, new in zip(old_lvs, new_lvs):
11146 new.logical_id = old.logical_id
11147 self.cfg.SetDiskID(new, self.target_node)
11149 # We need to modify old_lvs so that removal later removes the
11150 # right LVs, not the newly added ones; note that old_lvs is a
11152 for disk in old_lvs:
11153 disk.logical_id = ren_fn(disk, temp_suffix)
11154 self.cfg.SetDiskID(disk, self.target_node)
11156 # Now that the new lvs have the old name, we can add them to the device
11157 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11158 result = self.rpc.call_blockdev_addchildren(self.target_node,
11159 (dev, self.instance), new_lvs)
11160 msg = result.fail_msg
11162 for new_lv in new_lvs:
11163 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11166 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11167 hint=("cleanup manually the unused logical"
11169 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11171 cstep = itertools.count(5)
11173 if self.early_release:
11174 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11175 self._RemoveOldStorage(self.target_node, iv_names)
11176 # TODO: Check if releasing locks early still makes sense
11177 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11179 # Release all resource locks except those used by the instance
11180 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11181 keep=self.node_secondary_ip.keys())
11183 # Release all node locks while waiting for sync
11184 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11186 # TODO: Can the instance lock be downgraded here? Take the optional disk
11187 # shutdown in the caller into consideration.
11190 # This can fail as the old devices are degraded and _WaitForSync
11191 # does a combined result over all disks, so we don't check its return value
11192 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11193 _WaitForSync(self.lu, self.instance)
11195 # Check all devices manually
11196 self._CheckDevices(self.instance.primary_node, iv_names)
11198 # Step: remove old storage
11199 if not self.early_release:
11200 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11201 self._RemoveOldStorage(self.target_node, iv_names)
11203 def _ExecDrbd8Secondary(self, feedback_fn):
11204 """Replace the secondary node for DRBD 8.
11206 The algorithm for replace is quite complicated:
11207 - for all disks of the instance:
11208 - create new LVs on the new node with same names
11209 - shutdown the drbd device on the old secondary
11210 - disconnect the drbd network on the primary
11211 - create the drbd device on the new secondary
11212 - network attach the drbd on the primary, using an artifice:
11213 the drbd code for Attach() will connect to the network if it
11214 finds a device which is connected to the good local disks but
11215 not network enabled
11216 - wait for sync across all devices
11217 - remove all disks from the old secondary
11219 Failures are not very well handled.
11224 pnode = self.instance.primary_node
11226 # Step: check device activation
11227 self.lu.LogStep(1, steps_total, "Check device existence")
11228 self._CheckDisksExistence([self.instance.primary_node])
11229 self._CheckVolumeGroup([self.instance.primary_node])
11231 # Step: check other node consistency
11232 self.lu.LogStep(2, steps_total, "Check peer consistency")
11233 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11235 # Step: create new storage
11236 self.lu.LogStep(3, steps_total, "Allocate new storage")
11237 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11238 for idx, dev in enumerate(disks):
11239 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11240 (self.new_node, idx))
11241 # we pass force_create=True to force LVM creation
11242 for new_lv in dev.children:
11243 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11244 True, _GetInstanceInfoText(self.instance), False)
11246 # Step 4: dbrd minors and drbd setups changes
11247 # after this, we must manually remove the drbd minors on both the
11248 # error and the success paths
11249 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11250 minors = self.cfg.AllocateDRBDMinor([self.new_node
11251 for dev in self.instance.disks],
11252 self.instance.name)
11253 logging.debug("Allocated minors %r", minors)
11256 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11257 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11258 (self.new_node, idx))
11259 # create new devices on new_node; note that we create two IDs:
11260 # one without port, so the drbd will be activated without
11261 # networking information on the new node at this stage, and one
11262 # with network, for the latter activation in step 4
11263 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11264 if self.instance.primary_node == o_node1:
11267 assert self.instance.primary_node == o_node2, "Three-node instance?"
11270 new_alone_id = (self.instance.primary_node, self.new_node, None,
11271 p_minor, new_minor, o_secret)
11272 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11273 p_minor, new_minor, o_secret)
11275 iv_names[idx] = (dev, dev.children, new_net_id)
11276 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11278 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11279 logical_id=new_alone_id,
11280 children=dev.children,
11283 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11286 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11288 _GetInstanceInfoText(self.instance), False)
11289 except errors.GenericError:
11290 self.cfg.ReleaseDRBDMinors(self.instance.name)
11293 # We have new devices, shutdown the drbd on the old secondary
11294 for idx, dev in enumerate(self.instance.disks):
11295 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11296 self.cfg.SetDiskID(dev, self.target_node)
11297 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11298 (dev, self.instance)).fail_msg
11300 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11301 "node: %s" % (idx, msg),
11302 hint=("Please cleanup this device manually as"
11303 " soon as possible"))
11305 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11306 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11307 self.instance.disks)[pnode]
11309 msg = result.fail_msg
11311 # detaches didn't succeed (unlikely)
11312 self.cfg.ReleaseDRBDMinors(self.instance.name)
11313 raise errors.OpExecError("Can't detach the disks from the network on"
11314 " old node: %s" % (msg,))
11316 # if we managed to detach at least one, we update all the disks of
11317 # the instance to point to the new secondary
11318 self.lu.LogInfo("Updating instance configuration")
11319 for dev, _, new_logical_id in iv_names.itervalues():
11320 dev.logical_id = new_logical_id
11321 self.cfg.SetDiskID(dev, self.instance.primary_node)
11323 self.cfg.Update(self.instance, feedback_fn)
11325 # Release all node locks (the configuration has been updated)
11326 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11328 # and now perform the drbd attach
11329 self.lu.LogInfo("Attaching primary drbds to new secondary"
11330 " (standalone => connected)")
11331 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11333 self.node_secondary_ip,
11334 (self.instance.disks, self.instance),
11335 self.instance.name,
11337 for to_node, to_result in result.items():
11338 msg = to_result.fail_msg
11340 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11342 hint=("please do a gnt-instance info to see the"
11343 " status of disks"))
11345 cstep = itertools.count(5)
11347 if self.early_release:
11348 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11349 self._RemoveOldStorage(self.target_node, iv_names)
11350 # TODO: Check if releasing locks early still makes sense
11351 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11353 # Release all resource locks except those used by the instance
11354 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11355 keep=self.node_secondary_ip.keys())
11357 # TODO: Can the instance lock be downgraded here? Take the optional disk
11358 # shutdown in the caller into consideration.
11361 # This can fail as the old devices are degraded and _WaitForSync
11362 # does a combined result over all disks, so we don't check its return value
11363 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11364 _WaitForSync(self.lu, self.instance)
11366 # Check all devices manually
11367 self._CheckDevices(self.instance.primary_node, iv_names)
11369 # Step: remove old storage
11370 if not self.early_release:
11371 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11372 self._RemoveOldStorage(self.target_node, iv_names)
11375 class LURepairNodeStorage(NoHooksLU):
11376 """Repairs the volume group on a node.
11381 def CheckArguments(self):
11382 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11384 storage_type = self.op.storage_type
11386 if (constants.SO_FIX_CONSISTENCY not in
11387 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11388 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11389 " repaired" % storage_type,
11390 errors.ECODE_INVAL)
11392 def ExpandNames(self):
11393 self.needed_locks = {
11394 locking.LEVEL_NODE: [self.op.node_name],
11397 def _CheckFaultyDisks(self, instance, node_name):
11398 """Ensure faulty disks abort the opcode or at least warn."""
11400 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11402 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11403 " node '%s'" % (instance.name, node_name),
11404 errors.ECODE_STATE)
11405 except errors.OpPrereqError, err:
11406 if self.op.ignore_consistency:
11407 self.proc.LogWarning(str(err.args[0]))
11411 def CheckPrereq(self):
11412 """Check prerequisites.
11415 # Check whether any instance on this node has faulty disks
11416 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11417 if inst.admin_state != constants.ADMINST_UP:
11419 check_nodes = set(inst.all_nodes)
11420 check_nodes.discard(self.op.node_name)
11421 for inst_node_name in check_nodes:
11422 self._CheckFaultyDisks(inst, inst_node_name)
11424 def Exec(self, feedback_fn):
11425 feedback_fn("Repairing storage unit '%s' on %s ..." %
11426 (self.op.name, self.op.node_name))
11428 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11429 result = self.rpc.call_storage_execute(self.op.node_name,
11430 self.op.storage_type, st_args,
11432 constants.SO_FIX_CONSISTENCY)
11433 result.Raise("Failed to repair storage unit '%s' on %s" %
11434 (self.op.name, self.op.node_name))
11437 class LUNodeEvacuate(NoHooksLU):
11438 """Evacuates instances off a list of nodes.
11443 _MODE2IALLOCATOR = {
11444 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11445 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11446 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11448 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11449 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11450 constants.IALLOCATOR_NEVAC_MODES)
11452 def CheckArguments(self):
11453 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11455 def ExpandNames(self):
11456 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11458 if self.op.remote_node is not None:
11459 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11460 assert self.op.remote_node
11462 if self.op.remote_node == self.op.node_name:
11463 raise errors.OpPrereqError("Can not use evacuated node as a new"
11464 " secondary node", errors.ECODE_INVAL)
11466 if self.op.mode != constants.NODE_EVAC_SEC:
11467 raise errors.OpPrereqError("Without the use of an iallocator only"
11468 " secondary instances can be evacuated",
11469 errors.ECODE_INVAL)
11472 self.share_locks = _ShareAll()
11473 self.needed_locks = {
11474 locking.LEVEL_INSTANCE: [],
11475 locking.LEVEL_NODEGROUP: [],
11476 locking.LEVEL_NODE: [],
11479 # Determine nodes (via group) optimistically, needs verification once locks
11480 # have been acquired
11481 self.lock_nodes = self._DetermineNodes()
11483 def _DetermineNodes(self):
11484 """Gets the list of nodes to operate on.
11487 if self.op.remote_node is None:
11488 # Iallocator will choose any node(s) in the same group
11489 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11491 group_nodes = frozenset([self.op.remote_node])
11493 # Determine nodes to be locked
11494 return set([self.op.node_name]) | group_nodes
11496 def _DetermineInstances(self):
11497 """Builds list of instances to operate on.
11500 assert self.op.mode in constants.NODE_EVAC_MODES
11502 if self.op.mode == constants.NODE_EVAC_PRI:
11503 # Primary instances only
11504 inst_fn = _GetNodePrimaryInstances
11505 assert self.op.remote_node is None, \
11506 "Evacuating primary instances requires iallocator"
11507 elif self.op.mode == constants.NODE_EVAC_SEC:
11508 # Secondary instances only
11509 inst_fn = _GetNodeSecondaryInstances
11512 assert self.op.mode == constants.NODE_EVAC_ALL
11513 inst_fn = _GetNodeInstances
11514 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11516 raise errors.OpPrereqError("Due to an issue with the iallocator"
11517 " interface it is not possible to evacuate"
11518 " all instances at once; specify explicitly"
11519 " whether to evacuate primary or secondary"
11521 errors.ECODE_INVAL)
11523 return inst_fn(self.cfg, self.op.node_name)
11525 def DeclareLocks(self, level):
11526 if level == locking.LEVEL_INSTANCE:
11527 # Lock instances optimistically, needs verification once node and group
11528 # locks have been acquired
11529 self.needed_locks[locking.LEVEL_INSTANCE] = \
11530 set(i.name for i in self._DetermineInstances())
11532 elif level == locking.LEVEL_NODEGROUP:
11533 # Lock node groups for all potential target nodes optimistically, needs
11534 # verification once nodes have been acquired
11535 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11536 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11538 elif level == locking.LEVEL_NODE:
11539 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11541 def CheckPrereq(self):
11543 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11544 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11545 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11547 need_nodes = self._DetermineNodes()
11549 if not owned_nodes.issuperset(need_nodes):
11550 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11551 " locks were acquired, current nodes are"
11552 " are '%s', used to be '%s'; retry the"
11554 (self.op.node_name,
11555 utils.CommaJoin(need_nodes),
11556 utils.CommaJoin(owned_nodes)),
11557 errors.ECODE_STATE)
11559 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11560 if owned_groups != wanted_groups:
11561 raise errors.OpExecError("Node groups changed since locks were acquired,"
11562 " current groups are '%s', used to be '%s';"
11563 " retry the operation" %
11564 (utils.CommaJoin(wanted_groups),
11565 utils.CommaJoin(owned_groups)))
11567 # Determine affected instances
11568 self.instances = self._DetermineInstances()
11569 self.instance_names = [i.name for i in self.instances]
11571 if set(self.instance_names) != owned_instances:
11572 raise errors.OpExecError("Instances on node '%s' changed since locks"
11573 " were acquired, current instances are '%s',"
11574 " used to be '%s'; retry the operation" %
11575 (self.op.node_name,
11576 utils.CommaJoin(self.instance_names),
11577 utils.CommaJoin(owned_instances)))
11579 if self.instance_names:
11580 self.LogInfo("Evacuating instances from node '%s': %s",
11582 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11584 self.LogInfo("No instances to evacuate from node '%s'",
11587 if self.op.remote_node is not None:
11588 for i in self.instances:
11589 if i.primary_node == self.op.remote_node:
11590 raise errors.OpPrereqError("Node %s is the primary node of"
11591 " instance %s, cannot use it as"
11593 (self.op.remote_node, i.name),
11594 errors.ECODE_INVAL)
11596 def Exec(self, feedback_fn):
11597 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11599 if not self.instance_names:
11600 # No instances to evacuate
11603 elif self.op.iallocator is not None:
11604 # TODO: Implement relocation to other group
11605 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11606 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11607 instances=list(self.instance_names))
11609 ial.Run(self.op.iallocator)
11611 if not ial.success:
11612 raise errors.OpPrereqError("Can't compute node evacuation using"
11613 " iallocator '%s': %s" %
11614 (self.op.iallocator, ial.info),
11615 errors.ECODE_NORES)
11617 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11619 elif self.op.remote_node is not None:
11620 assert self.op.mode == constants.NODE_EVAC_SEC
11622 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11623 remote_node=self.op.remote_node,
11625 mode=constants.REPLACE_DISK_CHG,
11626 early_release=self.op.early_release)]
11627 for instance_name in self.instance_names
11631 raise errors.ProgrammerError("No iallocator or remote node")
11633 return ResultWithJobs(jobs)
11636 def _SetOpEarlyRelease(early_release, op):
11637 """Sets C{early_release} flag on opcodes if available.
11641 op.early_release = early_release
11642 except AttributeError:
11643 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11648 def _NodeEvacDest(use_nodes, group, nodes):
11649 """Returns group or nodes depending on caller's choice.
11653 return utils.CommaJoin(nodes)
11658 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11659 """Unpacks the result of change-group and node-evacuate iallocator requests.
11661 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11662 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11664 @type lu: L{LogicalUnit}
11665 @param lu: Logical unit instance
11666 @type alloc_result: tuple/list
11667 @param alloc_result: Result from iallocator
11668 @type early_release: bool
11669 @param early_release: Whether to release locks early if possible
11670 @type use_nodes: bool
11671 @param use_nodes: Whether to display node names instead of groups
11674 (moved, failed, jobs) = alloc_result
11677 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11678 for (name, reason) in failed)
11679 lu.LogWarning("Unable to evacuate instances %s", failreason)
11680 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11683 lu.LogInfo("Instances to be moved: %s",
11684 utils.CommaJoin("%s (to %s)" %
11685 (name, _NodeEvacDest(use_nodes, group, nodes))
11686 for (name, group, nodes) in moved))
11688 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11689 map(opcodes.OpCode.LoadOpCode, ops))
11693 class LUInstanceGrowDisk(LogicalUnit):
11694 """Grow a disk of an instance.
11697 HPATH = "disk-grow"
11698 HTYPE = constants.HTYPE_INSTANCE
11701 def ExpandNames(self):
11702 self._ExpandAndLockInstance()
11703 self.needed_locks[locking.LEVEL_NODE] = []
11704 self.needed_locks[locking.LEVEL_NODE_RES] = []
11705 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11706 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11708 def DeclareLocks(self, level):
11709 if level == locking.LEVEL_NODE:
11710 self._LockInstancesNodes()
11711 elif level == locking.LEVEL_NODE_RES:
11713 self.needed_locks[locking.LEVEL_NODE_RES] = \
11714 self.needed_locks[locking.LEVEL_NODE][:]
11716 def BuildHooksEnv(self):
11717 """Build hooks env.
11719 This runs on the master, the primary and all the secondaries.
11723 "DISK": self.op.disk,
11724 "AMOUNT": self.op.amount,
11725 "ABSOLUTE": self.op.absolute,
11727 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11730 def BuildHooksNodes(self):
11731 """Build hooks nodes.
11734 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11737 def CheckPrereq(self):
11738 """Check prerequisites.
11740 This checks that the instance is in the cluster.
11743 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11744 assert instance is not None, \
11745 "Cannot retrieve locked instance %s" % self.op.instance_name
11746 nodenames = list(instance.all_nodes)
11747 for node in nodenames:
11748 _CheckNodeOnline(self, node)
11750 self.instance = instance
11752 if instance.disk_template not in constants.DTS_GROWABLE:
11753 raise errors.OpPrereqError("Instance's disk layout does not support"
11754 " growing", errors.ECODE_INVAL)
11756 self.disk = instance.FindDisk(self.op.disk)
11758 if self.op.absolute:
11759 self.target = self.op.amount
11760 self.delta = self.target - self.disk.size
11762 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11763 "current disk size (%s)" %
11764 (utils.FormatUnit(self.target, "h"),
11765 utils.FormatUnit(self.disk.size, "h")),
11766 errors.ECODE_STATE)
11768 self.delta = self.op.amount
11769 self.target = self.disk.size + self.delta
11771 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11772 utils.FormatUnit(self.delta, "h"),
11773 errors.ECODE_INVAL)
11775 if instance.disk_template not in (constants.DT_FILE,
11776 constants.DT_SHARED_FILE,
11778 # TODO: check the free disk space for file, when that feature will be
11780 _CheckNodesFreeDiskPerVG(self, nodenames,
11781 self.disk.ComputeGrowth(self.delta))
11783 def Exec(self, feedback_fn):
11784 """Execute disk grow.
11787 instance = self.instance
11790 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11791 assert (self.owned_locks(locking.LEVEL_NODE) ==
11792 self.owned_locks(locking.LEVEL_NODE_RES))
11794 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11796 raise errors.OpExecError("Cannot activate block device to grow")
11798 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11799 (self.op.disk, instance.name,
11800 utils.FormatUnit(self.delta, "h"),
11801 utils.FormatUnit(self.target, "h")))
11803 # First run all grow ops in dry-run mode
11804 for node in instance.all_nodes:
11805 self.cfg.SetDiskID(disk, node)
11806 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11808 result.Raise("Grow request failed to node %s" % node)
11810 # We know that (as far as we can test) operations across different
11811 # nodes will succeed, time to run it for real on the backing storage
11812 for node in instance.all_nodes:
11813 self.cfg.SetDiskID(disk, node)
11814 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11816 result.Raise("Grow request failed to node %s" % node)
11818 # And now execute it for logical storage, on the primary node
11819 node = instance.primary_node
11820 self.cfg.SetDiskID(disk, node)
11821 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11823 result.Raise("Grow request failed to node %s" % node)
11825 disk.RecordGrow(self.delta)
11826 self.cfg.Update(instance, feedback_fn)
11828 # Changes have been recorded, release node lock
11829 _ReleaseLocks(self, locking.LEVEL_NODE)
11831 # Downgrade lock while waiting for sync
11832 self.glm.downgrade(locking.LEVEL_INSTANCE)
11834 if self.op.wait_for_sync:
11835 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11837 self.proc.LogWarning("Disk sync-ing has not returned a good"
11838 " status; please check the instance")
11839 if instance.admin_state != constants.ADMINST_UP:
11840 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11841 elif instance.admin_state != constants.ADMINST_UP:
11842 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11843 " not supposed to be running because no wait for"
11844 " sync mode was requested")
11846 assert self.owned_locks(locking.LEVEL_NODE_RES)
11847 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11850 class LUInstanceQueryData(NoHooksLU):
11851 """Query runtime instance data.
11856 def ExpandNames(self):
11857 self.needed_locks = {}
11859 # Use locking if requested or when non-static information is wanted
11860 if not (self.op.static or self.op.use_locking):
11861 self.LogWarning("Non-static data requested, locks need to be acquired")
11862 self.op.use_locking = True
11864 if self.op.instances or not self.op.use_locking:
11865 # Expand instance names right here
11866 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11868 # Will use acquired locks
11869 self.wanted_names = None
11871 if self.op.use_locking:
11872 self.share_locks = _ShareAll()
11874 if self.wanted_names is None:
11875 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11877 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11879 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11880 self.needed_locks[locking.LEVEL_NODE] = []
11881 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11883 def DeclareLocks(self, level):
11884 if self.op.use_locking:
11885 if level == locking.LEVEL_NODEGROUP:
11886 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11888 # Lock all groups used by instances optimistically; this requires going
11889 # via the node before it's locked, requiring verification later on
11890 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11891 frozenset(group_uuid
11892 for instance_name in owned_instances
11894 self.cfg.GetInstanceNodeGroups(instance_name))
11896 elif level == locking.LEVEL_NODE:
11897 self._LockInstancesNodes()
11899 def CheckPrereq(self):
11900 """Check prerequisites.
11902 This only checks the optional instance list against the existing names.
11905 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11906 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11907 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11909 if self.wanted_names is None:
11910 assert self.op.use_locking, "Locking was not used"
11911 self.wanted_names = owned_instances
11913 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11915 if self.op.use_locking:
11916 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11919 assert not (owned_instances or owned_groups or owned_nodes)
11921 self.wanted_instances = instances.values()
11923 def _ComputeBlockdevStatus(self, node, instance, dev):
11924 """Returns the status of a block device
11927 if self.op.static or not node:
11930 self.cfg.SetDiskID(dev, node)
11932 result = self.rpc.call_blockdev_find(node, dev)
11936 result.Raise("Can't compute disk status for %s" % instance.name)
11938 status = result.payload
11942 return (status.dev_path, status.major, status.minor,
11943 status.sync_percent, status.estimated_time,
11944 status.is_degraded, status.ldisk_status)
11946 def _ComputeDiskStatus(self, instance, snode, dev):
11947 """Compute block device status.
11950 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11952 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11954 def _ComputeDiskStatusInner(self, instance, snode, dev):
11955 """Compute block device status.
11957 @attention: The device has to be annotated already.
11960 if dev.dev_type in constants.LDS_DRBD:
11961 # we change the snode then (otherwise we use the one passed in)
11962 if dev.logical_id[0] == instance.primary_node:
11963 snode = dev.logical_id[1]
11965 snode = dev.logical_id[0]
11967 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11969 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11972 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11979 "iv_name": dev.iv_name,
11980 "dev_type": dev.dev_type,
11981 "logical_id": dev.logical_id,
11982 "physical_id": dev.physical_id,
11983 "pstatus": dev_pstatus,
11984 "sstatus": dev_sstatus,
11985 "children": dev_children,
11990 def Exec(self, feedback_fn):
11991 """Gather and return data"""
11994 cluster = self.cfg.GetClusterInfo()
11996 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11997 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11999 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12000 for node in nodes.values()))
12002 group2name_fn = lambda uuid: groups[uuid].name
12004 for instance in self.wanted_instances:
12005 pnode = nodes[instance.primary_node]
12007 if self.op.static or pnode.offline:
12008 remote_state = None
12010 self.LogWarning("Primary node %s is marked offline, returning static"
12011 " information only for instance %s" %
12012 (pnode.name, instance.name))
12014 remote_info = self.rpc.call_instance_info(instance.primary_node,
12016 instance.hypervisor)
12017 remote_info.Raise("Error checking node %s" % instance.primary_node)
12018 remote_info = remote_info.payload
12019 if remote_info and "state" in remote_info:
12020 remote_state = "up"
12022 if instance.admin_state == constants.ADMINST_UP:
12023 remote_state = "down"
12025 remote_state = instance.admin_state
12027 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12030 snodes_group_uuids = [nodes[snode_name].group
12031 for snode_name in instance.secondary_nodes]
12033 result[instance.name] = {
12034 "name": instance.name,
12035 "config_state": instance.admin_state,
12036 "run_state": remote_state,
12037 "pnode": instance.primary_node,
12038 "pnode_group_uuid": pnode.group,
12039 "pnode_group_name": group2name_fn(pnode.group),
12040 "snodes": instance.secondary_nodes,
12041 "snodes_group_uuids": snodes_group_uuids,
12042 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12044 # this happens to be the same format used for hooks
12045 "nics": _NICListToTuple(self, instance.nics),
12046 "disk_template": instance.disk_template,
12048 "hypervisor": instance.hypervisor,
12049 "network_port": instance.network_port,
12050 "hv_instance": instance.hvparams,
12051 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12052 "be_instance": instance.beparams,
12053 "be_actual": cluster.FillBE(instance),
12054 "os_instance": instance.osparams,
12055 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12056 "serial_no": instance.serial_no,
12057 "mtime": instance.mtime,
12058 "ctime": instance.ctime,
12059 "uuid": instance.uuid,
12065 def PrepareContainerMods(mods, private_fn):
12066 """Prepares a list of container modifications by adding a private data field.
12068 @type mods: list of tuples; (operation, index, parameters)
12069 @param mods: List of modifications
12070 @type private_fn: callable or None
12071 @param private_fn: Callable for constructing a private data field for a
12076 if private_fn is None:
12081 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12084 #: Type description for changes as returned by L{ApplyContainerMods}'s
12086 _TApplyContModsCbChanges = \
12087 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12088 ht.TNonEmptyString,
12093 def ApplyContainerMods(kind, container, chgdesc, mods,
12094 create_fn, modify_fn, remove_fn):
12095 """Applies descriptions in C{mods} to C{container}.
12098 @param kind: One-word item description
12099 @type container: list
12100 @param container: Container to modify
12101 @type chgdesc: None or list
12102 @param chgdesc: List of applied changes
12104 @param mods: Modifications as returned by L{PrepareContainerMods}
12105 @type create_fn: callable
12106 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12107 receives absolute item index, parameters and private data object as added
12108 by L{PrepareContainerMods}, returns tuple containing new item and changes
12110 @type modify_fn: callable
12111 @param modify_fn: Callback for modifying an existing item
12112 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12113 and private data object as added by L{PrepareContainerMods}, returns
12115 @type remove_fn: callable
12116 @param remove_fn: Callback on removing item; receives absolute item index,
12117 item and private data object as added by L{PrepareContainerMods}
12120 for (op, idx, params, private) in mods:
12123 absidx = len(container) - 1
12125 raise IndexError("Not accepting negative indices other than -1")
12126 elif idx > len(container):
12127 raise IndexError("Got %s index %s, but there are only %s" %
12128 (kind, idx, len(container)))
12134 if op == constants.DDM_ADD:
12135 # Calculate where item will be added
12137 addidx = len(container)
12141 if create_fn is None:
12144 (item, changes) = create_fn(addidx, params, private)
12147 container.append(item)
12150 assert idx <= len(container)
12151 # list.insert does so before the specified index
12152 container.insert(idx, item)
12154 # Retrieve existing item
12156 item = container[absidx]
12158 raise IndexError("Invalid %s index %s" % (kind, idx))
12160 if op == constants.DDM_REMOVE:
12163 if remove_fn is not None:
12164 remove_fn(absidx, item, private)
12166 changes = [("%s/%s" % (kind, absidx), "remove")]
12168 assert container[absidx] == item
12169 del container[absidx]
12170 elif op == constants.DDM_MODIFY:
12171 if modify_fn is not None:
12172 changes = modify_fn(absidx, item, params, private)
12174 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12176 assert _TApplyContModsCbChanges(changes)
12178 if not (chgdesc is None or changes is None):
12179 chgdesc.extend(changes)
12182 def _UpdateIvNames(base_index, disks):
12183 """Updates the C{iv_name} attribute of disks.
12185 @type disks: list of L{objects.Disk}
12188 for (idx, disk) in enumerate(disks):
12189 disk.iv_name = "disk/%s" % (base_index + idx, )
12192 class _InstNicModPrivate:
12193 """Data structure for network interface modifications.
12195 Used by L{LUInstanceSetParams}.
12198 def __init__(self):
12203 class LUInstanceSetParams(LogicalUnit):
12204 """Modifies an instances's parameters.
12207 HPATH = "instance-modify"
12208 HTYPE = constants.HTYPE_INSTANCE
12212 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12213 assert ht.TList(mods)
12214 assert not mods or len(mods[0]) in (2, 3)
12216 if mods and len(mods[0]) == 2:
12220 for op, params in mods:
12221 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12222 result.append((op, -1, params))
12226 raise errors.OpPrereqError("Only one %s add or remove operation is"
12227 " supported at a time" % kind,
12228 errors.ECODE_INVAL)
12230 result.append((constants.DDM_MODIFY, op, params))
12232 assert verify_fn(result)
12239 def _CheckMods(kind, mods, key_types, item_fn):
12240 """Ensures requested disk/NIC modifications are valid.
12243 for (op, _, params) in mods:
12244 assert ht.TDict(params)
12246 utils.ForceDictType(params, key_types)
12248 if op == constants.DDM_REMOVE:
12250 raise errors.OpPrereqError("No settings should be passed when"
12251 " removing a %s" % kind,
12252 errors.ECODE_INVAL)
12253 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12254 item_fn(op, params)
12256 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12259 def _VerifyDiskModification(op, params):
12260 """Verifies a disk modification.
12263 if op == constants.DDM_ADD:
12264 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12265 if mode not in constants.DISK_ACCESS_SET:
12266 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12267 errors.ECODE_INVAL)
12269 size = params.get(constants.IDISK_SIZE, None)
12271 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12272 constants.IDISK_SIZE, errors.ECODE_INVAL)
12276 except (TypeError, ValueError), err:
12277 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12278 errors.ECODE_INVAL)
12280 params[constants.IDISK_SIZE] = size
12282 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12283 raise errors.OpPrereqError("Disk size change not possible, use"
12284 " grow-disk", errors.ECODE_INVAL)
12287 def _VerifyNicModification(op, params):
12288 """Verifies a network interface modification.
12291 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12292 ip = params.get(constants.INIC_IP, None)
12295 elif ip.lower() == constants.VALUE_NONE:
12296 params[constants.INIC_IP] = None
12297 elif not netutils.IPAddress.IsValid(ip):
12298 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12299 errors.ECODE_INVAL)
12301 bridge = params.get("bridge", None)
12302 link = params.get(constants.INIC_LINK, None)
12303 if bridge and link:
12304 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12305 " at the same time", errors.ECODE_INVAL)
12306 elif bridge and bridge.lower() == constants.VALUE_NONE:
12307 params["bridge"] = None
12308 elif link and link.lower() == constants.VALUE_NONE:
12309 params[constants.INIC_LINK] = None
12311 if op == constants.DDM_ADD:
12312 macaddr = params.get(constants.INIC_MAC, None)
12313 if macaddr is None:
12314 params[constants.INIC_MAC] = constants.VALUE_AUTO
12316 if constants.INIC_MAC in params:
12317 macaddr = params[constants.INIC_MAC]
12318 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12319 macaddr = utils.NormalizeAndValidateMac(macaddr)
12321 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12322 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12323 " modifying an existing NIC",
12324 errors.ECODE_INVAL)
12326 def CheckArguments(self):
12327 if not (self.op.nics or self.op.disks or self.op.disk_template or
12328 self.op.hvparams or self.op.beparams or self.op.os_name or
12329 self.op.offline is not None or self.op.runtime_mem):
12330 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12332 if self.op.hvparams:
12333 _CheckGlobalHvParams(self.op.hvparams)
12336 self._UpgradeDiskNicMods("disk", self.op.disks,
12337 opcodes.OpInstanceSetParams.TestDiskModifications)
12339 self._UpgradeDiskNicMods("NIC", self.op.nics,
12340 opcodes.OpInstanceSetParams.TestNicModifications)
12342 # Check disk modifications
12343 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12344 self._VerifyDiskModification)
12346 if self.op.disks and self.op.disk_template is not None:
12347 raise errors.OpPrereqError("Disk template conversion and other disk"
12348 " changes not supported at the same time",
12349 errors.ECODE_INVAL)
12351 if (self.op.disk_template and
12352 self.op.disk_template in constants.DTS_INT_MIRROR and
12353 self.op.remote_node is None):
12354 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12355 " one requires specifying a secondary node",
12356 errors.ECODE_INVAL)
12358 # Check NIC modifications
12359 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12360 self._VerifyNicModification)
12362 def ExpandNames(self):
12363 self._ExpandAndLockInstance()
12364 # Can't even acquire node locks in shared mode as upcoming changes in
12365 # Ganeti 2.6 will start to modify the node object on disk conversion
12366 self.needed_locks[locking.LEVEL_NODE] = []
12367 self.needed_locks[locking.LEVEL_NODE_RES] = []
12368 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12370 def DeclareLocks(self, level):
12371 # TODO: Acquire group lock in shared mode (disk parameters)
12372 if level == locking.LEVEL_NODE:
12373 self._LockInstancesNodes()
12374 if self.op.disk_template and self.op.remote_node:
12375 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12376 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12377 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12379 self.needed_locks[locking.LEVEL_NODE_RES] = \
12380 self.needed_locks[locking.LEVEL_NODE][:]
12382 def BuildHooksEnv(self):
12383 """Build hooks env.
12385 This runs on the master, primary and secondaries.
12389 if constants.BE_MINMEM in self.be_new:
12390 args["minmem"] = self.be_new[constants.BE_MINMEM]
12391 if constants.BE_MAXMEM in self.be_new:
12392 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12393 if constants.BE_VCPUS in self.be_new:
12394 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12395 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12396 # information at all.
12398 if self._new_nics is not None:
12401 for nic in self._new_nics:
12402 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12403 mode = nicparams[constants.NIC_MODE]
12404 link = nicparams[constants.NIC_LINK]
12405 nics.append((nic.ip, nic.mac, mode, link))
12407 args["nics"] = nics
12409 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12410 if self.op.disk_template:
12411 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12412 if self.op.runtime_mem:
12413 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12417 def BuildHooksNodes(self):
12418 """Build hooks nodes.
12421 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12424 def _PrepareNicModification(self, params, private, old_ip, old_params,
12426 update_params_dict = dict([(key, params[key])
12427 for key in constants.NICS_PARAMETERS
12430 if "bridge" in params:
12431 update_params_dict[constants.NIC_LINK] = params["bridge"]
12433 new_params = _GetUpdatedParams(old_params, update_params_dict)
12434 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12436 new_filled_params = cluster.SimpleFillNIC(new_params)
12437 objects.NIC.CheckParameterSyntax(new_filled_params)
12439 new_mode = new_filled_params[constants.NIC_MODE]
12440 if new_mode == constants.NIC_MODE_BRIDGED:
12441 bridge = new_filled_params[constants.NIC_LINK]
12442 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12444 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12446 self.warn.append(msg)
12448 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12450 elif new_mode == constants.NIC_MODE_ROUTED:
12451 ip = params.get(constants.INIC_IP, old_ip)
12453 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12454 " on a routed NIC", errors.ECODE_INVAL)
12456 if constants.INIC_MAC in params:
12457 mac = params[constants.INIC_MAC]
12459 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12460 errors.ECODE_INVAL)
12461 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12462 # otherwise generate the MAC address
12463 params[constants.INIC_MAC] = \
12464 self.cfg.GenerateMAC(self.proc.GetECId())
12466 # or validate/reserve the current one
12468 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12469 except errors.ReservationError:
12470 raise errors.OpPrereqError("MAC address '%s' already in use"
12471 " in cluster" % mac,
12472 errors.ECODE_NOTUNIQUE)
12474 private.params = new_params
12475 private.filled = new_filled_params
12477 def CheckPrereq(self):
12478 """Check prerequisites.
12480 This only checks the instance list against the existing names.
12483 # checking the new params on the primary/secondary nodes
12485 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12486 cluster = self.cluster = self.cfg.GetClusterInfo()
12487 assert self.instance is not None, \
12488 "Cannot retrieve locked instance %s" % self.op.instance_name
12489 pnode = instance.primary_node
12490 nodelist = list(instance.all_nodes)
12491 pnode_info = self.cfg.GetNodeInfo(pnode)
12492 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12494 # Prepare disk/NIC modifications
12495 self.diskmod = PrepareContainerMods(self.op.disks, None)
12496 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12499 if self.op.os_name and not self.op.force:
12500 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12501 self.op.force_variant)
12502 instance_os = self.op.os_name
12504 instance_os = instance.os
12506 assert not (self.op.disk_template and self.op.disks), \
12507 "Can't modify disk template and apply disk changes at the same time"
12509 if self.op.disk_template:
12510 if instance.disk_template == self.op.disk_template:
12511 raise errors.OpPrereqError("Instance already has disk template %s" %
12512 instance.disk_template, errors.ECODE_INVAL)
12514 if (instance.disk_template,
12515 self.op.disk_template) not in self._DISK_CONVERSIONS:
12516 raise errors.OpPrereqError("Unsupported disk template conversion from"
12517 " %s to %s" % (instance.disk_template,
12518 self.op.disk_template),
12519 errors.ECODE_INVAL)
12520 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12521 msg="cannot change disk template")
12522 if self.op.disk_template in constants.DTS_INT_MIRROR:
12523 if self.op.remote_node == pnode:
12524 raise errors.OpPrereqError("Given new secondary node %s is the same"
12525 " as the primary node of the instance" %
12526 self.op.remote_node, errors.ECODE_STATE)
12527 _CheckNodeOnline(self, self.op.remote_node)
12528 _CheckNodeNotDrained(self, self.op.remote_node)
12529 # FIXME: here we assume that the old instance type is DT_PLAIN
12530 assert instance.disk_template == constants.DT_PLAIN
12531 disks = [{constants.IDISK_SIZE: d.size,
12532 constants.IDISK_VG: d.logical_id[0]}
12533 for d in instance.disks]
12534 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12535 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12537 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12538 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12539 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12540 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12541 ignore=self.op.ignore_ipolicy)
12542 if pnode_info.group != snode_info.group:
12543 self.LogWarning("The primary and secondary nodes are in two"
12544 " different node groups; the disk parameters"
12545 " from the first disk's node group will be"
12548 # hvparams processing
12549 if self.op.hvparams:
12550 hv_type = instance.hypervisor
12551 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12552 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12553 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12556 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12557 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12558 self.hv_proposed = self.hv_new = hv_new # the new actual values
12559 self.hv_inst = i_hvdict # the new dict (without defaults)
12561 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12563 self.hv_new = self.hv_inst = {}
12565 # beparams processing
12566 if self.op.beparams:
12567 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12569 objects.UpgradeBeParams(i_bedict)
12570 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12571 be_new = cluster.SimpleFillBE(i_bedict)
12572 self.be_proposed = self.be_new = be_new # the new actual values
12573 self.be_inst = i_bedict # the new dict (without defaults)
12575 self.be_new = self.be_inst = {}
12576 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12577 be_old = cluster.FillBE(instance)
12579 # CPU param validation -- checking every time a parameter is
12580 # changed to cover all cases where either CPU mask or vcpus have
12582 if (constants.BE_VCPUS in self.be_proposed and
12583 constants.HV_CPU_MASK in self.hv_proposed):
12585 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12586 # Verify mask is consistent with number of vCPUs. Can skip this
12587 # test if only 1 entry in the CPU mask, which means same mask
12588 # is applied to all vCPUs.
12589 if (len(cpu_list) > 1 and
12590 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12591 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12593 (self.be_proposed[constants.BE_VCPUS],
12594 self.hv_proposed[constants.HV_CPU_MASK]),
12595 errors.ECODE_INVAL)
12597 # Only perform this test if a new CPU mask is given
12598 if constants.HV_CPU_MASK in self.hv_new:
12599 # Calculate the largest CPU number requested
12600 max_requested_cpu = max(map(max, cpu_list))
12601 # Check that all of the instance's nodes have enough physical CPUs to
12602 # satisfy the requested CPU mask
12603 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12604 max_requested_cpu + 1, instance.hypervisor)
12606 # osparams processing
12607 if self.op.osparams:
12608 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12609 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12610 self.os_inst = i_osdict # the new dict (without defaults)
12616 #TODO(dynmem): do the appropriate check involving MINMEM
12617 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12618 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12619 mem_check_list = [pnode]
12620 if be_new[constants.BE_AUTO_BALANCE]:
12621 # either we changed auto_balance to yes or it was from before
12622 mem_check_list.extend(instance.secondary_nodes)
12623 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12624 instance.hypervisor)
12625 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12626 [instance.hypervisor])
12627 pninfo = nodeinfo[pnode]
12628 msg = pninfo.fail_msg
12630 # Assume the primary node is unreachable and go ahead
12631 self.warn.append("Can't get info from primary node %s: %s" %
12634 (_, _, (pnhvinfo, )) = pninfo.payload
12635 if not isinstance(pnhvinfo.get("memory_free", None), int):
12636 self.warn.append("Node data from primary node %s doesn't contain"
12637 " free memory information" % pnode)
12638 elif instance_info.fail_msg:
12639 self.warn.append("Can't get instance runtime information: %s" %
12640 instance_info.fail_msg)
12642 if instance_info.payload:
12643 current_mem = int(instance_info.payload["memory"])
12645 # Assume instance not running
12646 # (there is a slight race condition here, but it's not very
12647 # probable, and we have no other way to check)
12648 # TODO: Describe race condition
12650 #TODO(dynmem): do the appropriate check involving MINMEM
12651 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12652 pnhvinfo["memory_free"])
12654 raise errors.OpPrereqError("This change will prevent the instance"
12655 " from starting, due to %d MB of memory"
12656 " missing on its primary node" %
12658 errors.ECODE_NORES)
12660 if be_new[constants.BE_AUTO_BALANCE]:
12661 for node, nres in nodeinfo.items():
12662 if node not in instance.secondary_nodes:
12664 nres.Raise("Can't get info from secondary node %s" % node,
12665 prereq=True, ecode=errors.ECODE_STATE)
12666 (_, _, (nhvinfo, )) = nres.payload
12667 if not isinstance(nhvinfo.get("memory_free", None), int):
12668 raise errors.OpPrereqError("Secondary node %s didn't return free"
12669 " memory information" % node,
12670 errors.ECODE_STATE)
12671 #TODO(dynmem): do the appropriate check involving MINMEM
12672 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12673 raise errors.OpPrereqError("This change will prevent the instance"
12674 " from failover to its secondary node"
12675 " %s, due to not enough memory" % node,
12676 errors.ECODE_STATE)
12678 if self.op.runtime_mem:
12679 remote_info = self.rpc.call_instance_info(instance.primary_node,
12681 instance.hypervisor)
12682 remote_info.Raise("Error checking node %s" % instance.primary_node)
12683 if not remote_info.payload: # not running already
12684 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12685 errors.ECODE_STATE)
12687 current_memory = remote_info.payload["memory"]
12688 if (not self.op.force and
12689 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12690 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12691 raise errors.OpPrereqError("Instance %s must have memory between %d"
12692 " and %d MB of memory unless --force is"
12693 " given" % (instance.name,
12694 self.be_proposed[constants.BE_MINMEM],
12695 self.be_proposed[constants.BE_MAXMEM]),
12696 errors.ECODE_INVAL)
12698 if self.op.runtime_mem > current_memory:
12699 _CheckNodeFreeMemory(self, instance.primary_node,
12700 "ballooning memory for instance %s" %
12702 self.op.memory - current_memory,
12703 instance.hypervisor)
12705 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12706 raise errors.OpPrereqError("Disk operations not supported for"
12707 " diskless instances",
12708 errors.ECODE_INVAL)
12710 def _PrepareNicCreate(_, params, private):
12711 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12712 return (None, None)
12714 def _PrepareNicMod(_, nic, params, private):
12715 self._PrepareNicModification(params, private, nic.ip,
12716 nic.nicparams, cluster, pnode)
12719 # Verify NIC changes (operating on copy)
12720 nics = instance.nics[:]
12721 ApplyContainerMods("NIC", nics, None, self.nicmod,
12722 _PrepareNicCreate, _PrepareNicMod, None)
12723 if len(nics) > constants.MAX_NICS:
12724 raise errors.OpPrereqError("Instance has too many network interfaces"
12725 " (%d), cannot add more" % constants.MAX_NICS,
12726 errors.ECODE_STATE)
12728 # Verify disk changes (operating on a copy)
12729 disks = instance.disks[:]
12730 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12731 if len(disks) > constants.MAX_DISKS:
12732 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12733 " more" % constants.MAX_DISKS,
12734 errors.ECODE_STATE)
12736 if self.op.offline is not None:
12737 if self.op.offline:
12738 msg = "can't change to offline"
12740 msg = "can't change to online"
12741 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12743 # Pre-compute NIC changes (necessary to use result in hooks)
12744 self._nic_chgdesc = []
12746 # Operate on copies as this is still in prereq
12747 nics = [nic.Copy() for nic in instance.nics]
12748 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12749 self._CreateNewNic, self._ApplyNicMods, None)
12750 self._new_nics = nics
12752 self._new_nics = None
12754 def _ConvertPlainToDrbd(self, feedback_fn):
12755 """Converts an instance from plain to drbd.
12758 feedback_fn("Converting template to drbd")
12759 instance = self.instance
12760 pnode = instance.primary_node
12761 snode = self.op.remote_node
12763 assert instance.disk_template == constants.DT_PLAIN
12765 # create a fake disk info for _GenerateDiskTemplate
12766 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12767 constants.IDISK_VG: d.logical_id[0]}
12768 for d in instance.disks]
12769 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12770 instance.name, pnode, [snode],
12771 disk_info, None, None, 0, feedback_fn,
12773 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12775 info = _GetInstanceInfoText(instance)
12776 feedback_fn("Creating additional volumes...")
12777 # first, create the missing data and meta devices
12778 for disk in anno_disks:
12779 # unfortunately this is... not too nice
12780 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12782 for child in disk.children:
12783 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12784 # at this stage, all new LVs have been created, we can rename the
12786 feedback_fn("Renaming original volumes...")
12787 rename_list = [(o, n.children[0].logical_id)
12788 for (o, n) in zip(instance.disks, new_disks)]
12789 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12790 result.Raise("Failed to rename original LVs")
12792 feedback_fn("Initializing DRBD devices...")
12793 # all child devices are in place, we can now create the DRBD devices
12794 for disk in anno_disks:
12795 for node in [pnode, snode]:
12796 f_create = node == pnode
12797 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12799 # at this point, the instance has been modified
12800 instance.disk_template = constants.DT_DRBD8
12801 instance.disks = new_disks
12802 self.cfg.Update(instance, feedback_fn)
12804 # Release node locks while waiting for sync
12805 _ReleaseLocks(self, locking.LEVEL_NODE)
12807 # disks are created, waiting for sync
12808 disk_abort = not _WaitForSync(self, instance,
12809 oneshot=not self.op.wait_for_sync)
12811 raise errors.OpExecError("There are some degraded disks for"
12812 " this instance, please cleanup manually")
12814 # Node resource locks will be released by caller
12816 def _ConvertDrbdToPlain(self, feedback_fn):
12817 """Converts an instance from drbd to plain.
12820 instance = self.instance
12822 assert len(instance.secondary_nodes) == 1
12823 assert instance.disk_template == constants.DT_DRBD8
12825 pnode = instance.primary_node
12826 snode = instance.secondary_nodes[0]
12827 feedback_fn("Converting template to plain")
12829 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12830 new_disks = [d.children[0] for d in instance.disks]
12832 # copy over size and mode
12833 for parent, child in zip(old_disks, new_disks):
12834 child.size = parent.size
12835 child.mode = parent.mode
12837 # this is a DRBD disk, return its port to the pool
12838 # NOTE: this must be done right before the call to cfg.Update!
12839 for disk in old_disks:
12840 tcp_port = disk.logical_id[2]
12841 self.cfg.AddTcpUdpPort(tcp_port)
12843 # update instance structure
12844 instance.disks = new_disks
12845 instance.disk_template = constants.DT_PLAIN
12846 self.cfg.Update(instance, feedback_fn)
12848 # Release locks in case removing disks takes a while
12849 _ReleaseLocks(self, locking.LEVEL_NODE)
12851 feedback_fn("Removing volumes on the secondary node...")
12852 for disk in old_disks:
12853 self.cfg.SetDiskID(disk, snode)
12854 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12856 self.LogWarning("Could not remove block device %s on node %s,"
12857 " continuing anyway: %s", disk.iv_name, snode, msg)
12859 feedback_fn("Removing unneeded volumes on the primary node...")
12860 for idx, disk in enumerate(old_disks):
12861 meta = disk.children[1]
12862 self.cfg.SetDiskID(meta, pnode)
12863 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12865 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12866 " continuing anyway: %s", idx, pnode, msg)
12868 def _CreateNewDisk(self, idx, params, _):
12869 """Creates a new disk.
12872 instance = self.instance
12875 if instance.disk_template in constants.DTS_FILEBASED:
12876 (file_driver, file_path) = instance.disks[0].logical_id
12877 file_path = os.path.dirname(file_path)
12879 file_driver = file_path = None
12882 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12883 instance.primary_node, instance.secondary_nodes,
12884 [params], file_path, file_driver, idx,
12885 self.Log, self.diskparams)[0]
12887 info = _GetInstanceInfoText(instance)
12889 logging.info("Creating volume %s for instance %s",
12890 disk.iv_name, instance.name)
12891 # Note: this needs to be kept in sync with _CreateDisks
12893 for node in instance.all_nodes:
12894 f_create = (node == instance.primary_node)
12896 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12897 except errors.OpExecError, err:
12898 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12899 disk.iv_name, disk, node, err)
12902 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12906 def _ModifyDisk(idx, disk, params, _):
12907 """Modifies a disk.
12910 disk.mode = params[constants.IDISK_MODE]
12913 ("disk.mode/%d" % idx, disk.mode),
12916 def _RemoveDisk(self, idx, root, _):
12920 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12921 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12922 self.cfg.SetDiskID(disk, node)
12923 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12925 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12926 " continuing anyway", idx, node, msg)
12928 # if this is a DRBD disk, return its port to the pool
12929 if root.dev_type in constants.LDS_DRBD:
12930 self.cfg.AddTcpUdpPort(root.logical_id[2])
12933 def _CreateNewNic(idx, params, private):
12934 """Creates data structure for a new network interface.
12937 mac = params[constants.INIC_MAC]
12938 ip = params.get(constants.INIC_IP, None)
12939 nicparams = private.params
12941 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12943 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12944 (mac, ip, private.filled[constants.NIC_MODE],
12945 private.filled[constants.NIC_LINK])),
12949 def _ApplyNicMods(idx, nic, params, private):
12950 """Modifies a network interface.
12955 for key in [constants.INIC_MAC, constants.INIC_IP]:
12957 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12958 setattr(nic, key, params[key])
12961 nic.nicparams = private.params
12963 for (key, val) in params.items():
12964 changes.append(("nic.%s/%d" % (key, idx), val))
12968 def Exec(self, feedback_fn):
12969 """Modifies an instance.
12971 All parameters take effect only at the next restart of the instance.
12974 # Process here the warnings from CheckPrereq, as we don't have a
12975 # feedback_fn there.
12976 # TODO: Replace with self.LogWarning
12977 for warn in self.warn:
12978 feedback_fn("WARNING: %s" % warn)
12980 assert ((self.op.disk_template is None) ^
12981 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12982 "Not owning any node resource locks"
12985 instance = self.instance
12988 if self.op.runtime_mem:
12989 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12991 self.op.runtime_mem)
12992 rpcres.Raise("Cannot modify instance runtime memory")
12993 result.append(("runtime_memory", self.op.runtime_mem))
12995 # Apply disk changes
12996 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12997 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12998 _UpdateIvNames(0, instance.disks)
13000 if self.op.disk_template:
13002 check_nodes = set(instance.all_nodes)
13003 if self.op.remote_node:
13004 check_nodes.add(self.op.remote_node)
13005 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13006 owned = self.owned_locks(level)
13007 assert not (check_nodes - owned), \
13008 ("Not owning the correct locks, owning %r, expected at least %r" %
13009 (owned, check_nodes))
13011 r_shut = _ShutdownInstanceDisks(self, instance)
13013 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13014 " proceed with disk template conversion")
13015 mode = (instance.disk_template, self.op.disk_template)
13017 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13019 self.cfg.ReleaseDRBDMinors(instance.name)
13021 result.append(("disk_template", self.op.disk_template))
13023 assert instance.disk_template == self.op.disk_template, \
13024 ("Expected disk template '%s', found '%s'" %
13025 (self.op.disk_template, instance.disk_template))
13027 # Release node and resource locks if there are any (they might already have
13028 # been released during disk conversion)
13029 _ReleaseLocks(self, locking.LEVEL_NODE)
13030 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13032 # Apply NIC changes
13033 if self._new_nics is not None:
13034 instance.nics = self._new_nics
13035 result.extend(self._nic_chgdesc)
13038 if self.op.hvparams:
13039 instance.hvparams = self.hv_inst
13040 for key, val in self.op.hvparams.iteritems():
13041 result.append(("hv/%s" % key, val))
13044 if self.op.beparams:
13045 instance.beparams = self.be_inst
13046 for key, val in self.op.beparams.iteritems():
13047 result.append(("be/%s" % key, val))
13050 if self.op.os_name:
13051 instance.os = self.op.os_name
13054 if self.op.osparams:
13055 instance.osparams = self.os_inst
13056 for key, val in self.op.osparams.iteritems():
13057 result.append(("os/%s" % key, val))
13059 if self.op.offline is None:
13062 elif self.op.offline:
13063 # Mark instance as offline
13064 self.cfg.MarkInstanceOffline(instance.name)
13065 result.append(("admin_state", constants.ADMINST_OFFLINE))
13067 # Mark instance as online, but stopped
13068 self.cfg.MarkInstanceDown(instance.name)
13069 result.append(("admin_state", constants.ADMINST_DOWN))
13071 self.cfg.Update(instance, feedback_fn)
13073 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13074 self.owned_locks(locking.LEVEL_NODE)), \
13075 "All node locks should have been released by now"
13079 _DISK_CONVERSIONS = {
13080 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13081 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13085 class LUInstanceChangeGroup(LogicalUnit):
13086 HPATH = "instance-change-group"
13087 HTYPE = constants.HTYPE_INSTANCE
13090 def ExpandNames(self):
13091 self.share_locks = _ShareAll()
13092 self.needed_locks = {
13093 locking.LEVEL_NODEGROUP: [],
13094 locking.LEVEL_NODE: [],
13097 self._ExpandAndLockInstance()
13099 if self.op.target_groups:
13100 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13101 self.op.target_groups)
13103 self.req_target_uuids = None
13105 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13107 def DeclareLocks(self, level):
13108 if level == locking.LEVEL_NODEGROUP:
13109 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13111 if self.req_target_uuids:
13112 lock_groups = set(self.req_target_uuids)
13114 # Lock all groups used by instance optimistically; this requires going
13115 # via the node before it's locked, requiring verification later on
13116 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13117 lock_groups.update(instance_groups)
13119 # No target groups, need to lock all of them
13120 lock_groups = locking.ALL_SET
13122 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13124 elif level == locking.LEVEL_NODE:
13125 if self.req_target_uuids:
13126 # Lock all nodes used by instances
13127 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13128 self._LockInstancesNodes()
13130 # Lock all nodes in all potential target groups
13131 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13132 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13133 member_nodes = [node_name
13134 for group in lock_groups
13135 for node_name in self.cfg.GetNodeGroup(group).members]
13136 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13138 # Lock all nodes as all groups are potential targets
13139 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13141 def CheckPrereq(self):
13142 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13143 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13144 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13146 assert (self.req_target_uuids is None or
13147 owned_groups.issuperset(self.req_target_uuids))
13148 assert owned_instances == set([self.op.instance_name])
13150 # Get instance information
13151 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13153 # Check if node groups for locked instance are still correct
13154 assert owned_nodes.issuperset(self.instance.all_nodes), \
13155 ("Instance %s's nodes changed while we kept the lock" %
13156 self.op.instance_name)
13158 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13161 if self.req_target_uuids:
13162 # User requested specific target groups
13163 self.target_uuids = frozenset(self.req_target_uuids)
13165 # All groups except those used by the instance are potential targets
13166 self.target_uuids = owned_groups - inst_groups
13168 conflicting_groups = self.target_uuids & inst_groups
13169 if conflicting_groups:
13170 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13171 " used by the instance '%s'" %
13172 (utils.CommaJoin(conflicting_groups),
13173 self.op.instance_name),
13174 errors.ECODE_INVAL)
13176 if not self.target_uuids:
13177 raise errors.OpPrereqError("There are no possible target groups",
13178 errors.ECODE_INVAL)
13180 def BuildHooksEnv(self):
13181 """Build hooks env.
13184 assert self.target_uuids
13187 "TARGET_GROUPS": " ".join(self.target_uuids),
13190 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13194 def BuildHooksNodes(self):
13195 """Build hooks nodes.
13198 mn = self.cfg.GetMasterNode()
13199 return ([mn], [mn])
13201 def Exec(self, feedback_fn):
13202 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13204 assert instances == [self.op.instance_name], "Instance not locked"
13206 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13207 instances=instances, target_groups=list(self.target_uuids))
13209 ial.Run(self.op.iallocator)
13211 if not ial.success:
13212 raise errors.OpPrereqError("Can't compute solution for changing group of"
13213 " instance '%s' using iallocator '%s': %s" %
13214 (self.op.instance_name, self.op.iallocator,
13216 errors.ECODE_NORES)
13218 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13220 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13221 " instance '%s'", len(jobs), self.op.instance_name)
13223 return ResultWithJobs(jobs)
13226 class LUBackupQuery(NoHooksLU):
13227 """Query the exports list
13232 def CheckArguments(self):
13233 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13234 ["node", "export"], self.op.use_locking)
13236 def ExpandNames(self):
13237 self.expq.ExpandNames(self)
13239 def DeclareLocks(self, level):
13240 self.expq.DeclareLocks(self, level)
13242 def Exec(self, feedback_fn):
13245 for (node, expname) in self.expq.OldStyleQuery(self):
13246 if expname is None:
13247 result[node] = False
13249 result.setdefault(node, []).append(expname)
13254 class _ExportQuery(_QueryBase):
13255 FIELDS = query.EXPORT_FIELDS
13257 #: The node name is not a unique key for this query
13258 SORT_FIELD = "node"
13260 def ExpandNames(self, lu):
13261 lu.needed_locks = {}
13263 # The following variables interact with _QueryBase._GetNames
13265 self.wanted = _GetWantedNodes(lu, self.names)
13267 self.wanted = locking.ALL_SET
13269 self.do_locking = self.use_locking
13271 if self.do_locking:
13272 lu.share_locks = _ShareAll()
13273 lu.needed_locks = {
13274 locking.LEVEL_NODE: self.wanted,
13277 def DeclareLocks(self, lu, level):
13280 def _GetQueryData(self, lu):
13281 """Computes the list of nodes and their attributes.
13284 # Locking is not used
13286 assert not (compat.any(lu.glm.is_owned(level)
13287 for level in locking.LEVELS
13288 if level != locking.LEVEL_CLUSTER) or
13289 self.do_locking or self.use_locking)
13291 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13295 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13297 result.append((node, None))
13299 result.extend((node, expname) for expname in nres.payload)
13304 class LUBackupPrepare(NoHooksLU):
13305 """Prepares an instance for an export and returns useful information.
13310 def ExpandNames(self):
13311 self._ExpandAndLockInstance()
13313 def CheckPrereq(self):
13314 """Check prerequisites.
13317 instance_name = self.op.instance_name
13319 self.instance = self.cfg.GetInstanceInfo(instance_name)
13320 assert self.instance is not None, \
13321 "Cannot retrieve locked instance %s" % self.op.instance_name
13322 _CheckNodeOnline(self, self.instance.primary_node)
13324 self._cds = _GetClusterDomainSecret()
13326 def Exec(self, feedback_fn):
13327 """Prepares an instance for an export.
13330 instance = self.instance
13332 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13333 salt = utils.GenerateSecret(8)
13335 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13336 result = self.rpc.call_x509_cert_create(instance.primary_node,
13337 constants.RIE_CERT_VALIDITY)
13338 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13340 (name, cert_pem) = result.payload
13342 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13346 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13347 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13349 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13355 class LUBackupExport(LogicalUnit):
13356 """Export an instance to an image in the cluster.
13359 HPATH = "instance-export"
13360 HTYPE = constants.HTYPE_INSTANCE
13363 def CheckArguments(self):
13364 """Check the arguments.
13367 self.x509_key_name = self.op.x509_key_name
13368 self.dest_x509_ca_pem = self.op.destination_x509_ca
13370 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13371 if not self.x509_key_name:
13372 raise errors.OpPrereqError("Missing X509 key name for encryption",
13373 errors.ECODE_INVAL)
13375 if not self.dest_x509_ca_pem:
13376 raise errors.OpPrereqError("Missing destination X509 CA",
13377 errors.ECODE_INVAL)
13379 def ExpandNames(self):
13380 self._ExpandAndLockInstance()
13382 # Lock all nodes for local exports
13383 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13384 # FIXME: lock only instance primary and destination node
13386 # Sad but true, for now we have do lock all nodes, as we don't know where
13387 # the previous export might be, and in this LU we search for it and
13388 # remove it from its current node. In the future we could fix this by:
13389 # - making a tasklet to search (share-lock all), then create the
13390 # new one, then one to remove, after
13391 # - removing the removal operation altogether
13392 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13394 def DeclareLocks(self, level):
13395 """Last minute lock declaration."""
13396 # All nodes are locked anyway, so nothing to do here.
13398 def BuildHooksEnv(self):
13399 """Build hooks env.
13401 This will run on the master, primary node and target node.
13405 "EXPORT_MODE": self.op.mode,
13406 "EXPORT_NODE": self.op.target_node,
13407 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13408 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13409 # TODO: Generic function for boolean env variables
13410 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13413 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13417 def BuildHooksNodes(self):
13418 """Build hooks nodes.
13421 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13423 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13424 nl.append(self.op.target_node)
13428 def CheckPrereq(self):
13429 """Check prerequisites.
13431 This checks that the instance and node names are valid.
13434 instance_name = self.op.instance_name
13436 self.instance = self.cfg.GetInstanceInfo(instance_name)
13437 assert self.instance is not None, \
13438 "Cannot retrieve locked instance %s" % self.op.instance_name
13439 _CheckNodeOnline(self, self.instance.primary_node)
13441 if (self.op.remove_instance and
13442 self.instance.admin_state == constants.ADMINST_UP and
13443 not self.op.shutdown):
13444 raise errors.OpPrereqError("Can not remove instance without shutting it"
13447 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13448 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13449 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13450 assert self.dst_node is not None
13452 _CheckNodeOnline(self, self.dst_node.name)
13453 _CheckNodeNotDrained(self, self.dst_node.name)
13456 self.dest_disk_info = None
13457 self.dest_x509_ca = None
13459 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13460 self.dst_node = None
13462 if len(self.op.target_node) != len(self.instance.disks):
13463 raise errors.OpPrereqError(("Received destination information for %s"
13464 " disks, but instance %s has %s disks") %
13465 (len(self.op.target_node), instance_name,
13466 len(self.instance.disks)),
13467 errors.ECODE_INVAL)
13469 cds = _GetClusterDomainSecret()
13471 # Check X509 key name
13473 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13474 except (TypeError, ValueError), err:
13475 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13477 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13478 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13479 errors.ECODE_INVAL)
13481 # Load and verify CA
13483 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13484 except OpenSSL.crypto.Error, err:
13485 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13486 (err, ), errors.ECODE_INVAL)
13488 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13489 if errcode is not None:
13490 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13491 (msg, ), errors.ECODE_INVAL)
13493 self.dest_x509_ca = cert
13495 # Verify target information
13497 for idx, disk_data in enumerate(self.op.target_node):
13499 (host, port, magic) = \
13500 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13501 except errors.GenericError, err:
13502 raise errors.OpPrereqError("Target info for disk %s: %s" %
13503 (idx, err), errors.ECODE_INVAL)
13505 disk_info.append((host, port, magic))
13507 assert len(disk_info) == len(self.op.target_node)
13508 self.dest_disk_info = disk_info
13511 raise errors.ProgrammerError("Unhandled export mode %r" %
13514 # instance disk type verification
13515 # TODO: Implement export support for file-based disks
13516 for disk in self.instance.disks:
13517 if disk.dev_type == constants.LD_FILE:
13518 raise errors.OpPrereqError("Export not supported for instances with"
13519 " file-based disks", errors.ECODE_INVAL)
13521 def _CleanupExports(self, feedback_fn):
13522 """Removes exports of current instance from all other nodes.
13524 If an instance in a cluster with nodes A..D was exported to node C, its
13525 exports will be removed from the nodes A, B and D.
13528 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13530 nodelist = self.cfg.GetNodeList()
13531 nodelist.remove(self.dst_node.name)
13533 # on one-node clusters nodelist will be empty after the removal
13534 # if we proceed the backup would be removed because OpBackupQuery
13535 # substitutes an empty list with the full cluster node list.
13536 iname = self.instance.name
13538 feedback_fn("Removing old exports for instance %s" % iname)
13539 exportlist = self.rpc.call_export_list(nodelist)
13540 for node in exportlist:
13541 if exportlist[node].fail_msg:
13543 if iname in exportlist[node].payload:
13544 msg = self.rpc.call_export_remove(node, iname).fail_msg
13546 self.LogWarning("Could not remove older export for instance %s"
13547 " on node %s: %s", iname, node, msg)
13549 def Exec(self, feedback_fn):
13550 """Export an instance to an image in the cluster.
13553 assert self.op.mode in constants.EXPORT_MODES
13555 instance = self.instance
13556 src_node = instance.primary_node
13558 if self.op.shutdown:
13559 # shutdown the instance, but not the disks
13560 feedback_fn("Shutting down instance %s" % instance.name)
13561 result = self.rpc.call_instance_shutdown(src_node, instance,
13562 self.op.shutdown_timeout)
13563 # TODO: Maybe ignore failures if ignore_remove_failures is set
13564 result.Raise("Could not shutdown instance %s on"
13565 " node %s" % (instance.name, src_node))
13567 # set the disks ID correctly since call_instance_start needs the
13568 # correct drbd minor to create the symlinks
13569 for disk in instance.disks:
13570 self.cfg.SetDiskID(disk, src_node)
13572 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13575 # Activate the instance disks if we'exporting a stopped instance
13576 feedback_fn("Activating disks for %s" % instance.name)
13577 _StartInstanceDisks(self, instance, None)
13580 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13583 helper.CreateSnapshots()
13585 if (self.op.shutdown and
13586 instance.admin_state == constants.ADMINST_UP and
13587 not self.op.remove_instance):
13588 assert not activate_disks
13589 feedback_fn("Starting instance %s" % instance.name)
13590 result = self.rpc.call_instance_start(src_node,
13591 (instance, None, None), False)
13592 msg = result.fail_msg
13594 feedback_fn("Failed to start instance: %s" % msg)
13595 _ShutdownInstanceDisks(self, instance)
13596 raise errors.OpExecError("Could not start instance: %s" % msg)
13598 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13599 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13600 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13601 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13602 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13604 (key_name, _, _) = self.x509_key_name
13607 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13610 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13611 key_name, dest_ca_pem,
13616 # Check for backwards compatibility
13617 assert len(dresults) == len(instance.disks)
13618 assert compat.all(isinstance(i, bool) for i in dresults), \
13619 "Not all results are boolean: %r" % dresults
13623 feedback_fn("Deactivating disks for %s" % instance.name)
13624 _ShutdownInstanceDisks(self, instance)
13626 if not (compat.all(dresults) and fin_resu):
13629 failures.append("export finalization")
13630 if not compat.all(dresults):
13631 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13633 failures.append("disk export: disk(s) %s" % fdsk)
13635 raise errors.OpExecError("Export failed, errors in %s" %
13636 utils.CommaJoin(failures))
13638 # At this point, the export was successful, we can cleanup/finish
13640 # Remove instance if requested
13641 if self.op.remove_instance:
13642 feedback_fn("Removing instance %s" % instance.name)
13643 _RemoveInstance(self, feedback_fn, instance,
13644 self.op.ignore_remove_failures)
13646 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13647 self._CleanupExports(feedback_fn)
13649 return fin_resu, dresults
13652 class LUBackupRemove(NoHooksLU):
13653 """Remove exports related to the named instance.
13658 def ExpandNames(self):
13659 self.needed_locks = {}
13660 # We need all nodes to be locked in order for RemoveExport to work, but we
13661 # don't need to lock the instance itself, as nothing will happen to it (and
13662 # we can remove exports also for a removed instance)
13663 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13665 def Exec(self, feedback_fn):
13666 """Remove any export.
13669 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13670 # If the instance was not found we'll try with the name that was passed in.
13671 # This will only work if it was an FQDN, though.
13673 if not instance_name:
13675 instance_name = self.op.instance_name
13677 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13678 exportlist = self.rpc.call_export_list(locked_nodes)
13680 for node in exportlist:
13681 msg = exportlist[node].fail_msg
13683 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13685 if instance_name in exportlist[node].payload:
13687 result = self.rpc.call_export_remove(node, instance_name)
13688 msg = result.fail_msg
13690 logging.error("Could not remove export for instance %s"
13691 " on node %s: %s", instance_name, node, msg)
13693 if fqdn_warn and not found:
13694 feedback_fn("Export not found. If trying to remove an export belonging"
13695 " to a deleted instance please use its Fully Qualified"
13699 class LUGroupAdd(LogicalUnit):
13700 """Logical unit for creating node groups.
13703 HPATH = "group-add"
13704 HTYPE = constants.HTYPE_GROUP
13707 def ExpandNames(self):
13708 # We need the new group's UUID here so that we can create and acquire the
13709 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13710 # that it should not check whether the UUID exists in the configuration.
13711 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13712 self.needed_locks = {}
13713 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13715 def CheckPrereq(self):
13716 """Check prerequisites.
13718 This checks that the given group name is not an existing node group
13723 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13724 except errors.OpPrereqError:
13727 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13728 " node group (UUID: %s)" %
13729 (self.op.group_name, existing_uuid),
13730 errors.ECODE_EXISTS)
13732 if self.op.ndparams:
13733 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13735 if self.op.hv_state:
13736 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13738 self.new_hv_state = None
13740 if self.op.disk_state:
13741 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13743 self.new_disk_state = None
13745 if self.op.diskparams:
13746 for templ in constants.DISK_TEMPLATES:
13747 if templ in self.op.diskparams:
13748 utils.ForceDictType(self.op.diskparams[templ],
13749 constants.DISK_DT_TYPES)
13750 self.new_diskparams = self.op.diskparams
13752 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13753 except errors.OpPrereqError, err:
13754 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13755 errors.ECODE_INVAL)
13757 self.new_diskparams = {}
13759 if self.op.ipolicy:
13760 cluster = self.cfg.GetClusterInfo()
13761 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13763 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13764 except errors.ConfigurationError, err:
13765 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13766 errors.ECODE_INVAL)
13768 def BuildHooksEnv(self):
13769 """Build hooks env.
13773 "GROUP_NAME": self.op.group_name,
13776 def BuildHooksNodes(self):
13777 """Build hooks nodes.
13780 mn = self.cfg.GetMasterNode()
13781 return ([mn], [mn])
13783 def Exec(self, feedback_fn):
13784 """Add the node group to the cluster.
13787 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13788 uuid=self.group_uuid,
13789 alloc_policy=self.op.alloc_policy,
13790 ndparams=self.op.ndparams,
13791 diskparams=self.new_diskparams,
13792 ipolicy=self.op.ipolicy,
13793 hv_state_static=self.new_hv_state,
13794 disk_state_static=self.new_disk_state)
13796 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13797 del self.remove_locks[locking.LEVEL_NODEGROUP]
13800 class LUGroupAssignNodes(NoHooksLU):
13801 """Logical unit for assigning nodes to groups.
13806 def ExpandNames(self):
13807 # These raise errors.OpPrereqError on their own:
13808 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13809 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13811 # We want to lock all the affected nodes and groups. We have readily
13812 # available the list of nodes, and the *destination* group. To gather the
13813 # list of "source" groups, we need to fetch node information later on.
13814 self.needed_locks = {
13815 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13816 locking.LEVEL_NODE: self.op.nodes,
13819 def DeclareLocks(self, level):
13820 if level == locking.LEVEL_NODEGROUP:
13821 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13823 # Try to get all affected nodes' groups without having the group or node
13824 # lock yet. Needs verification later in the code flow.
13825 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13827 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13829 def CheckPrereq(self):
13830 """Check prerequisites.
13833 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13834 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13835 frozenset(self.op.nodes))
13837 expected_locks = (set([self.group_uuid]) |
13838 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13839 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13840 if actual_locks != expected_locks:
13841 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13842 " current groups are '%s', used to be '%s'" %
13843 (utils.CommaJoin(expected_locks),
13844 utils.CommaJoin(actual_locks)))
13846 self.node_data = self.cfg.GetAllNodesInfo()
13847 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13848 instance_data = self.cfg.GetAllInstancesInfo()
13850 if self.group is None:
13851 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13852 (self.op.group_name, self.group_uuid))
13854 (new_splits, previous_splits) = \
13855 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13856 for node in self.op.nodes],
13857 self.node_data, instance_data)
13860 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13862 if not self.op.force:
13863 raise errors.OpExecError("The following instances get split by this"
13864 " change and --force was not given: %s" %
13867 self.LogWarning("This operation will split the following instances: %s",
13870 if previous_splits:
13871 self.LogWarning("In addition, these already-split instances continue"
13872 " to be split across groups: %s",
13873 utils.CommaJoin(utils.NiceSort(previous_splits)))
13875 def Exec(self, feedback_fn):
13876 """Assign nodes to a new group.
13879 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13881 self.cfg.AssignGroupNodes(mods)
13884 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13885 """Check for split instances after a node assignment.
13887 This method considers a series of node assignments as an atomic operation,
13888 and returns information about split instances after applying the set of
13891 In particular, it returns information about newly split instances, and
13892 instances that were already split, and remain so after the change.
13894 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13897 @type changes: list of (node_name, new_group_uuid) pairs.
13898 @param changes: list of node assignments to consider.
13899 @param node_data: a dict with data for all nodes
13900 @param instance_data: a dict with all instances to consider
13901 @rtype: a two-tuple
13902 @return: a list of instances that were previously okay and result split as a
13903 consequence of this change, and a list of instances that were previously
13904 split and this change does not fix.
13907 changed_nodes = dict((node, group) for node, group in changes
13908 if node_data[node].group != group)
13910 all_split_instances = set()
13911 previously_split_instances = set()
13913 def InstanceNodes(instance):
13914 return [instance.primary_node] + list(instance.secondary_nodes)
13916 for inst in instance_data.values():
13917 if inst.disk_template not in constants.DTS_INT_MIRROR:
13920 instance_nodes = InstanceNodes(inst)
13922 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13923 previously_split_instances.add(inst.name)
13925 if len(set(changed_nodes.get(node, node_data[node].group)
13926 for node in instance_nodes)) > 1:
13927 all_split_instances.add(inst.name)
13929 return (list(all_split_instances - previously_split_instances),
13930 list(previously_split_instances & all_split_instances))
13933 class _GroupQuery(_QueryBase):
13934 FIELDS = query.GROUP_FIELDS
13936 def ExpandNames(self, lu):
13937 lu.needed_locks = {}
13939 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13940 self._cluster = lu.cfg.GetClusterInfo()
13941 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13944 self.wanted = [name_to_uuid[name]
13945 for name in utils.NiceSort(name_to_uuid.keys())]
13947 # Accept names to be either names or UUIDs.
13950 all_uuid = frozenset(self._all_groups.keys())
13952 for name in self.names:
13953 if name in all_uuid:
13954 self.wanted.append(name)
13955 elif name in name_to_uuid:
13956 self.wanted.append(name_to_uuid[name])
13958 missing.append(name)
13961 raise errors.OpPrereqError("Some groups do not exist: %s" %
13962 utils.CommaJoin(missing),
13963 errors.ECODE_NOENT)
13965 def DeclareLocks(self, lu, level):
13968 def _GetQueryData(self, lu):
13969 """Computes the list of node groups and their attributes.
13972 do_nodes = query.GQ_NODE in self.requested_data
13973 do_instances = query.GQ_INST in self.requested_data
13975 group_to_nodes = None
13976 group_to_instances = None
13978 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13979 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13980 # latter GetAllInstancesInfo() is not enough, for we have to go through
13981 # instance->node. Hence, we will need to process nodes even if we only need
13982 # instance information.
13983 if do_nodes or do_instances:
13984 all_nodes = lu.cfg.GetAllNodesInfo()
13985 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13988 for node in all_nodes.values():
13989 if node.group in group_to_nodes:
13990 group_to_nodes[node.group].append(node.name)
13991 node_to_group[node.name] = node.group
13994 all_instances = lu.cfg.GetAllInstancesInfo()
13995 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13997 for instance in all_instances.values():
13998 node = instance.primary_node
13999 if node in node_to_group:
14000 group_to_instances[node_to_group[node]].append(instance.name)
14003 # Do not pass on node information if it was not requested.
14004 group_to_nodes = None
14006 return query.GroupQueryData(self._cluster,
14007 [self._all_groups[uuid]
14008 for uuid in self.wanted],
14009 group_to_nodes, group_to_instances,
14010 query.GQ_DISKPARAMS in self.requested_data)
14013 class LUGroupQuery(NoHooksLU):
14014 """Logical unit for querying node groups.
14019 def CheckArguments(self):
14020 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14021 self.op.output_fields, False)
14023 def ExpandNames(self):
14024 self.gq.ExpandNames(self)
14026 def DeclareLocks(self, level):
14027 self.gq.DeclareLocks(self, level)
14029 def Exec(self, feedback_fn):
14030 return self.gq.OldStyleQuery(self)
14033 class LUGroupSetParams(LogicalUnit):
14034 """Modifies the parameters of a node group.
14037 HPATH = "group-modify"
14038 HTYPE = constants.HTYPE_GROUP
14041 def CheckArguments(self):
14044 self.op.diskparams,
14045 self.op.alloc_policy,
14047 self.op.disk_state,
14051 if all_changes.count(None) == len(all_changes):
14052 raise errors.OpPrereqError("Please pass at least one modification",
14053 errors.ECODE_INVAL)
14055 def ExpandNames(self):
14056 # This raises errors.OpPrereqError on its own:
14057 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14059 self.needed_locks = {
14060 locking.LEVEL_INSTANCE: [],
14061 locking.LEVEL_NODEGROUP: [self.group_uuid],
14064 self.share_locks[locking.LEVEL_INSTANCE] = 1
14066 def DeclareLocks(self, level):
14067 if level == locking.LEVEL_INSTANCE:
14068 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14070 # Lock instances optimistically, needs verification once group lock has
14072 self.needed_locks[locking.LEVEL_INSTANCE] = \
14073 self.cfg.GetNodeGroupInstances(self.group_uuid)
14076 def _UpdateAndVerifyDiskParams(old, new):
14077 """Updates and verifies disk parameters.
14080 new_params = _GetUpdatedParams(old, new)
14081 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14084 def CheckPrereq(self):
14085 """Check prerequisites.
14088 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14090 # Check if locked instances are still correct
14091 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14093 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14094 cluster = self.cfg.GetClusterInfo()
14096 if self.group is None:
14097 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14098 (self.op.group_name, self.group_uuid))
14100 if self.op.ndparams:
14101 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14102 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14103 self.new_ndparams = new_ndparams
14105 if self.op.diskparams:
14106 diskparams = self.group.diskparams
14107 uavdp = self._UpdateAndVerifyDiskParams
14108 # For each disktemplate subdict update and verify the values
14109 new_diskparams = dict((dt,
14110 uavdp(diskparams.get(dt, {}),
14111 self.op.diskparams[dt]))
14112 for dt in constants.DISK_TEMPLATES
14113 if dt in self.op.diskparams)
14114 # As we've all subdicts of diskparams ready, lets merge the actual
14115 # dict with all updated subdicts
14116 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14118 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14119 except errors.OpPrereqError, err:
14120 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14121 errors.ECODE_INVAL)
14123 if self.op.hv_state:
14124 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14125 self.group.hv_state_static)
14127 if self.op.disk_state:
14128 self.new_disk_state = \
14129 _MergeAndVerifyDiskState(self.op.disk_state,
14130 self.group.disk_state_static)
14132 if self.op.ipolicy:
14133 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14137 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14138 inst_filter = lambda inst: inst.name in owned_instances
14139 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14141 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14143 new_ipolicy, instances)
14146 self.LogWarning("After the ipolicy change the following instances"
14147 " violate them: %s",
14148 utils.CommaJoin(violations))
14150 def BuildHooksEnv(self):
14151 """Build hooks env.
14155 "GROUP_NAME": self.op.group_name,
14156 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14159 def BuildHooksNodes(self):
14160 """Build hooks nodes.
14163 mn = self.cfg.GetMasterNode()
14164 return ([mn], [mn])
14166 def Exec(self, feedback_fn):
14167 """Modifies the node group.
14172 if self.op.ndparams:
14173 self.group.ndparams = self.new_ndparams
14174 result.append(("ndparams", str(self.group.ndparams)))
14176 if self.op.diskparams:
14177 self.group.diskparams = self.new_diskparams
14178 result.append(("diskparams", str(self.group.diskparams)))
14180 if self.op.alloc_policy:
14181 self.group.alloc_policy = self.op.alloc_policy
14183 if self.op.hv_state:
14184 self.group.hv_state_static = self.new_hv_state
14186 if self.op.disk_state:
14187 self.group.disk_state_static = self.new_disk_state
14189 if self.op.ipolicy:
14190 self.group.ipolicy = self.new_ipolicy
14192 self.cfg.Update(self.group, feedback_fn)
14196 class LUGroupRemove(LogicalUnit):
14197 HPATH = "group-remove"
14198 HTYPE = constants.HTYPE_GROUP
14201 def ExpandNames(self):
14202 # This will raises errors.OpPrereqError on its own:
14203 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14204 self.needed_locks = {
14205 locking.LEVEL_NODEGROUP: [self.group_uuid],
14208 def CheckPrereq(self):
14209 """Check prerequisites.
14211 This checks that the given group name exists as a node group, that is
14212 empty (i.e., contains no nodes), and that is not the last group of the
14216 # Verify that the group is empty.
14217 group_nodes = [node.name
14218 for node in self.cfg.GetAllNodesInfo().values()
14219 if node.group == self.group_uuid]
14222 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14224 (self.op.group_name,
14225 utils.CommaJoin(utils.NiceSort(group_nodes))),
14226 errors.ECODE_STATE)
14228 # Verify the cluster would not be left group-less.
14229 if len(self.cfg.GetNodeGroupList()) == 1:
14230 raise errors.OpPrereqError("Group '%s' is the only group,"
14231 " cannot be removed" %
14232 self.op.group_name,
14233 errors.ECODE_STATE)
14235 def BuildHooksEnv(self):
14236 """Build hooks env.
14240 "GROUP_NAME": self.op.group_name,
14243 def BuildHooksNodes(self):
14244 """Build hooks nodes.
14247 mn = self.cfg.GetMasterNode()
14248 return ([mn], [mn])
14250 def Exec(self, feedback_fn):
14251 """Remove the node group.
14255 self.cfg.RemoveNodeGroup(self.group_uuid)
14256 except errors.ConfigurationError:
14257 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14258 (self.op.group_name, self.group_uuid))
14260 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14263 class LUGroupRename(LogicalUnit):
14264 HPATH = "group-rename"
14265 HTYPE = constants.HTYPE_GROUP
14268 def ExpandNames(self):
14269 # This raises errors.OpPrereqError on its own:
14270 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14272 self.needed_locks = {
14273 locking.LEVEL_NODEGROUP: [self.group_uuid],
14276 def CheckPrereq(self):
14277 """Check prerequisites.
14279 Ensures requested new name is not yet used.
14283 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14284 except errors.OpPrereqError:
14287 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14288 " node group (UUID: %s)" %
14289 (self.op.new_name, new_name_uuid),
14290 errors.ECODE_EXISTS)
14292 def BuildHooksEnv(self):
14293 """Build hooks env.
14297 "OLD_NAME": self.op.group_name,
14298 "NEW_NAME": self.op.new_name,
14301 def BuildHooksNodes(self):
14302 """Build hooks nodes.
14305 mn = self.cfg.GetMasterNode()
14307 all_nodes = self.cfg.GetAllNodesInfo()
14308 all_nodes.pop(mn, None)
14311 run_nodes.extend(node.name for node in all_nodes.values()
14312 if node.group == self.group_uuid)
14314 return (run_nodes, run_nodes)
14316 def Exec(self, feedback_fn):
14317 """Rename the node group.
14320 group = self.cfg.GetNodeGroup(self.group_uuid)
14323 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14324 (self.op.group_name, self.group_uuid))
14326 group.name = self.op.new_name
14327 self.cfg.Update(group, feedback_fn)
14329 return self.op.new_name
14332 class LUGroupEvacuate(LogicalUnit):
14333 HPATH = "group-evacuate"
14334 HTYPE = constants.HTYPE_GROUP
14337 def ExpandNames(self):
14338 # This raises errors.OpPrereqError on its own:
14339 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14341 if self.op.target_groups:
14342 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14343 self.op.target_groups)
14345 self.req_target_uuids = []
14347 if self.group_uuid in self.req_target_uuids:
14348 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14349 " as a target group (targets are %s)" %
14351 utils.CommaJoin(self.req_target_uuids)),
14352 errors.ECODE_INVAL)
14354 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14356 self.share_locks = _ShareAll()
14357 self.needed_locks = {
14358 locking.LEVEL_INSTANCE: [],
14359 locking.LEVEL_NODEGROUP: [],
14360 locking.LEVEL_NODE: [],
14363 def DeclareLocks(self, level):
14364 if level == locking.LEVEL_INSTANCE:
14365 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14367 # Lock instances optimistically, needs verification once node and group
14368 # locks have been acquired
14369 self.needed_locks[locking.LEVEL_INSTANCE] = \
14370 self.cfg.GetNodeGroupInstances(self.group_uuid)
14372 elif level == locking.LEVEL_NODEGROUP:
14373 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14375 if self.req_target_uuids:
14376 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14378 # Lock all groups used by instances optimistically; this requires going
14379 # via the node before it's locked, requiring verification later on
14380 lock_groups.update(group_uuid
14381 for instance_name in
14382 self.owned_locks(locking.LEVEL_INSTANCE)
14384 self.cfg.GetInstanceNodeGroups(instance_name))
14386 # No target groups, need to lock all of them
14387 lock_groups = locking.ALL_SET
14389 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14391 elif level == locking.LEVEL_NODE:
14392 # This will only lock the nodes in the group to be evacuated which
14393 # contain actual instances
14394 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14395 self._LockInstancesNodes()
14397 # Lock all nodes in group to be evacuated and target groups
14398 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14399 assert self.group_uuid in owned_groups
14400 member_nodes = [node_name
14401 for group in owned_groups
14402 for node_name in self.cfg.GetNodeGroup(group).members]
14403 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14405 def CheckPrereq(self):
14406 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14407 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14408 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14410 assert owned_groups.issuperset(self.req_target_uuids)
14411 assert self.group_uuid in owned_groups
14413 # Check if locked instances are still correct
14414 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14416 # Get instance information
14417 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14419 # Check if node groups for locked instances are still correct
14420 _CheckInstancesNodeGroups(self.cfg, self.instances,
14421 owned_groups, owned_nodes, self.group_uuid)
14423 if self.req_target_uuids:
14424 # User requested specific target groups
14425 self.target_uuids = self.req_target_uuids
14427 # All groups except the one to be evacuated are potential targets
14428 self.target_uuids = [group_uuid for group_uuid in owned_groups
14429 if group_uuid != self.group_uuid]
14431 if not self.target_uuids:
14432 raise errors.OpPrereqError("There are no possible target groups",
14433 errors.ECODE_INVAL)
14435 def BuildHooksEnv(self):
14436 """Build hooks env.
14440 "GROUP_NAME": self.op.group_name,
14441 "TARGET_GROUPS": " ".join(self.target_uuids),
14444 def BuildHooksNodes(self):
14445 """Build hooks nodes.
14448 mn = self.cfg.GetMasterNode()
14450 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14452 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14454 return (run_nodes, run_nodes)
14456 def Exec(self, feedback_fn):
14457 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14459 assert self.group_uuid not in self.target_uuids
14461 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14462 instances=instances, target_groups=self.target_uuids)
14464 ial.Run(self.op.iallocator)
14466 if not ial.success:
14467 raise errors.OpPrereqError("Can't compute group evacuation using"
14468 " iallocator '%s': %s" %
14469 (self.op.iallocator, ial.info),
14470 errors.ECODE_NORES)
14472 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14474 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14475 len(jobs), self.op.group_name)
14477 return ResultWithJobs(jobs)
14480 class TagsLU(NoHooksLU): # pylint: disable=W0223
14481 """Generic tags LU.
14483 This is an abstract class which is the parent of all the other tags LUs.
14486 def ExpandNames(self):
14487 self.group_uuid = None
14488 self.needed_locks = {}
14490 if self.op.kind == constants.TAG_NODE:
14491 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14492 lock_level = locking.LEVEL_NODE
14493 lock_name = self.op.name
14494 elif self.op.kind == constants.TAG_INSTANCE:
14495 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14496 lock_level = locking.LEVEL_INSTANCE
14497 lock_name = self.op.name
14498 elif self.op.kind == constants.TAG_NODEGROUP:
14499 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14500 lock_level = locking.LEVEL_NODEGROUP
14501 lock_name = self.group_uuid
14506 if lock_level and getattr(self.op, "use_locking", True):
14507 self.needed_locks[lock_level] = lock_name
14509 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14510 # not possible to acquire the BGL based on opcode parameters)
14512 def CheckPrereq(self):
14513 """Check prerequisites.
14516 if self.op.kind == constants.TAG_CLUSTER:
14517 self.target = self.cfg.GetClusterInfo()
14518 elif self.op.kind == constants.TAG_NODE:
14519 self.target = self.cfg.GetNodeInfo(self.op.name)
14520 elif self.op.kind == constants.TAG_INSTANCE:
14521 self.target = self.cfg.GetInstanceInfo(self.op.name)
14522 elif self.op.kind == constants.TAG_NODEGROUP:
14523 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14525 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14526 str(self.op.kind), errors.ECODE_INVAL)
14529 class LUTagsGet(TagsLU):
14530 """Returns the tags of a given object.
14535 def ExpandNames(self):
14536 TagsLU.ExpandNames(self)
14538 # Share locks as this is only a read operation
14539 self.share_locks = _ShareAll()
14541 def Exec(self, feedback_fn):
14542 """Returns the tag list.
14545 return list(self.target.GetTags())
14548 class LUTagsSearch(NoHooksLU):
14549 """Searches the tags for a given pattern.
14554 def ExpandNames(self):
14555 self.needed_locks = {}
14557 def CheckPrereq(self):
14558 """Check prerequisites.
14560 This checks the pattern passed for validity by compiling it.
14564 self.re = re.compile(self.op.pattern)
14565 except re.error, err:
14566 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14567 (self.op.pattern, err), errors.ECODE_INVAL)
14569 def Exec(self, feedback_fn):
14570 """Returns the tag list.
14574 tgts = [("/cluster", cfg.GetClusterInfo())]
14575 ilist = cfg.GetAllInstancesInfo().values()
14576 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14577 nlist = cfg.GetAllNodesInfo().values()
14578 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14579 tgts.extend(("/nodegroup/%s" % n.name, n)
14580 for n in cfg.GetAllNodeGroupsInfo().values())
14582 for path, target in tgts:
14583 for tag in target.GetTags():
14584 if self.re.search(tag):
14585 results.append((path, tag))
14589 class LUTagsSet(TagsLU):
14590 """Sets a tag on a given object.
14595 def CheckPrereq(self):
14596 """Check prerequisites.
14598 This checks the type and length of the tag name and value.
14601 TagsLU.CheckPrereq(self)
14602 for tag in self.op.tags:
14603 objects.TaggableObject.ValidateTag(tag)
14605 def Exec(self, feedback_fn):
14610 for tag in self.op.tags:
14611 self.target.AddTag(tag)
14612 except errors.TagError, err:
14613 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14614 self.cfg.Update(self.target, feedback_fn)
14617 class LUTagsDel(TagsLU):
14618 """Delete a list of tags from a given object.
14623 def CheckPrereq(self):
14624 """Check prerequisites.
14626 This checks that we have the given tag.
14629 TagsLU.CheckPrereq(self)
14630 for tag in self.op.tags:
14631 objects.TaggableObject.ValidateTag(tag)
14632 del_tags = frozenset(self.op.tags)
14633 cur_tags = self.target.GetTags()
14635 diff_tags = del_tags - cur_tags
14637 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14638 raise errors.OpPrereqError("Tag(s) %s not found" %
14639 (utils.CommaJoin(diff_names), ),
14640 errors.ECODE_NOENT)
14642 def Exec(self, feedback_fn):
14643 """Remove the tag from the object.
14646 for tag in self.op.tags:
14647 self.target.RemoveTag(tag)
14648 self.cfg.Update(self.target, feedback_fn)
14651 class LUTestDelay(NoHooksLU):
14652 """Sleep for a specified amount of time.
14654 This LU sleeps on the master and/or nodes for a specified amount of
14660 def ExpandNames(self):
14661 """Expand names and set required locks.
14663 This expands the node list, if any.
14666 self.needed_locks = {}
14667 if self.op.on_nodes:
14668 # _GetWantedNodes can be used here, but is not always appropriate to use
14669 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14670 # more information.
14671 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14672 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14674 def _TestDelay(self):
14675 """Do the actual sleep.
14678 if self.op.on_master:
14679 if not utils.TestDelay(self.op.duration):
14680 raise errors.OpExecError("Error during master delay test")
14681 if self.op.on_nodes:
14682 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14683 for node, node_result in result.items():
14684 node_result.Raise("Failure during rpc call to node %s" % node)
14686 def Exec(self, feedback_fn):
14687 """Execute the test delay opcode, with the wanted repetitions.
14690 if self.op.repeat == 0:
14693 top_value = self.op.repeat - 1
14694 for i in range(self.op.repeat):
14695 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14699 class LUTestJqueue(NoHooksLU):
14700 """Utility LU to test some aspects of the job queue.
14705 # Must be lower than default timeout for WaitForJobChange to see whether it
14706 # notices changed jobs
14707 _CLIENT_CONNECT_TIMEOUT = 20.0
14708 _CLIENT_CONFIRM_TIMEOUT = 60.0
14711 def _NotifyUsingSocket(cls, cb, errcls):
14712 """Opens a Unix socket and waits for another program to connect.
14715 @param cb: Callback to send socket name to client
14716 @type errcls: class
14717 @param errcls: Exception class to use for errors
14720 # Using a temporary directory as there's no easy way to create temporary
14721 # sockets without writing a custom loop around tempfile.mktemp and
14723 tmpdir = tempfile.mkdtemp()
14725 tmpsock = utils.PathJoin(tmpdir, "sock")
14727 logging.debug("Creating temporary socket at %s", tmpsock)
14728 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14733 # Send details to client
14736 # Wait for client to connect before continuing
14737 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14739 (conn, _) = sock.accept()
14740 except socket.error, err:
14741 raise errcls("Client didn't connect in time (%s)" % err)
14745 # Remove as soon as client is connected
14746 shutil.rmtree(tmpdir)
14748 # Wait for client to close
14751 # pylint: disable=E1101
14752 # Instance of '_socketobject' has no ... member
14753 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14755 except socket.error, err:
14756 raise errcls("Client failed to confirm notification (%s)" % err)
14760 def _SendNotification(self, test, arg, sockname):
14761 """Sends a notification to the client.
14764 @param test: Test name
14765 @param arg: Test argument (depends on test)
14766 @type sockname: string
14767 @param sockname: Socket path
14770 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14772 def _Notify(self, prereq, test, arg):
14773 """Notifies the client of a test.
14776 @param prereq: Whether this is a prereq-phase test
14778 @param test: Test name
14779 @param arg: Test argument (depends on test)
14783 errcls = errors.OpPrereqError
14785 errcls = errors.OpExecError
14787 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14791 def CheckArguments(self):
14792 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14793 self.expandnames_calls = 0
14795 def ExpandNames(self):
14796 checkargs_calls = getattr(self, "checkargs_calls", 0)
14797 if checkargs_calls < 1:
14798 raise errors.ProgrammerError("CheckArguments was not called")
14800 self.expandnames_calls += 1
14802 if self.op.notify_waitlock:
14803 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14805 self.LogInfo("Expanding names")
14807 # Get lock on master node (just to get a lock, not for a particular reason)
14808 self.needed_locks = {
14809 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14812 def Exec(self, feedback_fn):
14813 if self.expandnames_calls < 1:
14814 raise errors.ProgrammerError("ExpandNames was not called")
14816 if self.op.notify_exec:
14817 self._Notify(False, constants.JQT_EXEC, None)
14819 self.LogInfo("Executing")
14821 if self.op.log_messages:
14822 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14823 for idx, msg in enumerate(self.op.log_messages):
14824 self.LogInfo("Sending log message %s", idx + 1)
14825 feedback_fn(constants.JQT_MSGPREFIX + msg)
14826 # Report how many test messages have been sent
14827 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14830 raise errors.OpExecError("Opcode failure was requested")
14835 class IAllocator(object):
14836 """IAllocator framework.
14838 An IAllocator instance has three sets of attributes:
14839 - cfg that is needed to query the cluster
14840 - input data (all members of the _KEYS class attribute are required)
14841 - four buffer attributes (in|out_data|text), that represent the
14842 input (to the external script) in text and data structure format,
14843 and the output from it, again in two formats
14844 - the result variables from the script (success, info, nodes) for
14848 # pylint: disable=R0902
14849 # lots of instance attributes
14851 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14853 self.rpc = rpc_runner
14854 # init buffer variables
14855 self.in_text = self.out_text = self.in_data = self.out_data = None
14856 # init all input fields so that pylint is happy
14858 self.memory = self.disks = self.disk_template = self.spindle_use = None
14859 self.os = self.tags = self.nics = self.vcpus = None
14860 self.hypervisor = None
14861 self.relocate_from = None
14863 self.instances = None
14864 self.evac_mode = None
14865 self.target_groups = []
14867 self.required_nodes = None
14868 # init result fields
14869 self.success = self.info = self.result = None
14872 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14874 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14875 " IAllocator" % self.mode)
14877 keyset = [n for (n, _) in keydata]
14880 if key not in keyset:
14881 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14882 " IAllocator" % key)
14883 setattr(self, key, kwargs[key])
14886 if key not in kwargs:
14887 raise errors.ProgrammerError("Missing input parameter '%s' to"
14888 " IAllocator" % key)
14889 self._BuildInputData(compat.partial(fn, self), keydata)
14891 def _ComputeClusterData(self):
14892 """Compute the generic allocator input data.
14894 This is the data that is independent of the actual operation.
14898 cluster_info = cfg.GetClusterInfo()
14901 "version": constants.IALLOCATOR_VERSION,
14902 "cluster_name": cfg.GetClusterName(),
14903 "cluster_tags": list(cluster_info.GetTags()),
14904 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14905 "ipolicy": cluster_info.ipolicy,
14907 ninfo = cfg.GetAllNodesInfo()
14908 iinfo = cfg.GetAllInstancesInfo().values()
14909 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14912 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14914 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14915 hypervisor_name = self.hypervisor
14916 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14917 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14919 hypervisor_name = cluster_info.primary_hypervisor
14921 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14924 self.rpc.call_all_instances_info(node_list,
14925 cluster_info.enabled_hypervisors)
14927 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14929 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14930 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14931 i_list, config_ndata)
14932 assert len(data["nodes"]) == len(ninfo), \
14933 "Incomplete node data computed"
14935 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14937 self.in_data = data
14940 def _ComputeNodeGroupData(cfg):
14941 """Compute node groups data.
14944 cluster = cfg.GetClusterInfo()
14945 ng = dict((guuid, {
14946 "name": gdata.name,
14947 "alloc_policy": gdata.alloc_policy,
14948 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14950 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14955 def _ComputeBasicNodeData(cfg, node_cfg):
14956 """Compute global node data.
14959 @returns: a dict of name: (node dict, node config)
14962 # fill in static (config-based) values
14963 node_results = dict((ninfo.name, {
14964 "tags": list(ninfo.GetTags()),
14965 "primary_ip": ninfo.primary_ip,
14966 "secondary_ip": ninfo.secondary_ip,
14967 "offline": ninfo.offline,
14968 "drained": ninfo.drained,
14969 "master_candidate": ninfo.master_candidate,
14970 "group": ninfo.group,
14971 "master_capable": ninfo.master_capable,
14972 "vm_capable": ninfo.vm_capable,
14973 "ndparams": cfg.GetNdParams(ninfo),
14975 for ninfo in node_cfg.values())
14977 return node_results
14980 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14982 """Compute global node data.
14984 @param node_results: the basic node structures as filled from the config
14987 #TODO(dynmem): compute the right data on MAX and MIN memory
14988 # make a copy of the current dict
14989 node_results = dict(node_results)
14990 for nname, nresult in node_data.items():
14991 assert nname in node_results, "Missing basic data for node %s" % nname
14992 ninfo = node_cfg[nname]
14994 if not (ninfo.offline or ninfo.drained):
14995 nresult.Raise("Can't get data for node %s" % nname)
14996 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14998 remote_info = _MakeLegacyNodeInfo(nresult.payload)
15000 for attr in ["memory_total", "memory_free", "memory_dom0",
15001 "vg_size", "vg_free", "cpu_total"]:
15002 if attr not in remote_info:
15003 raise errors.OpExecError("Node '%s' didn't return attribute"
15004 " '%s'" % (nname, attr))
15005 if not isinstance(remote_info[attr], int):
15006 raise errors.OpExecError("Node '%s' returned invalid value"
15008 (nname, attr, remote_info[attr]))
15009 # compute memory used by primary instances
15010 i_p_mem = i_p_up_mem = 0
15011 for iinfo, beinfo in i_list:
15012 if iinfo.primary_node == nname:
15013 i_p_mem += beinfo[constants.BE_MAXMEM]
15014 if iinfo.name not in node_iinfo[nname].payload:
15017 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
15018 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
15019 remote_info["memory_free"] -= max(0, i_mem_diff)
15021 if iinfo.admin_state == constants.ADMINST_UP:
15022 i_p_up_mem += beinfo[constants.BE_MAXMEM]
15024 # compute memory used by instances
15026 "total_memory": remote_info["memory_total"],
15027 "reserved_memory": remote_info["memory_dom0"],
15028 "free_memory": remote_info["memory_free"],
15029 "total_disk": remote_info["vg_size"],
15030 "free_disk": remote_info["vg_free"],
15031 "total_cpus": remote_info["cpu_total"],
15032 "i_pri_memory": i_p_mem,
15033 "i_pri_up_memory": i_p_up_mem,
15035 pnr_dyn.update(node_results[nname])
15036 node_results[nname] = pnr_dyn
15038 return node_results
15041 def _ComputeInstanceData(cluster_info, i_list):
15042 """Compute global instance data.
15046 for iinfo, beinfo in i_list:
15048 for nic in iinfo.nics:
15049 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15053 "mode": filled_params[constants.NIC_MODE],
15054 "link": filled_params[constants.NIC_LINK],
15056 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15057 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15058 nic_data.append(nic_dict)
15060 "tags": list(iinfo.GetTags()),
15061 "admin_state": iinfo.admin_state,
15062 "vcpus": beinfo[constants.BE_VCPUS],
15063 "memory": beinfo[constants.BE_MAXMEM],
15064 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15066 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15068 "disks": [{constants.IDISK_SIZE: dsk.size,
15069 constants.IDISK_MODE: dsk.mode}
15070 for dsk in iinfo.disks],
15071 "disk_template": iinfo.disk_template,
15072 "hypervisor": iinfo.hypervisor,
15074 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15076 instance_data[iinfo.name] = pir
15078 return instance_data
15080 def _AddNewInstance(self):
15081 """Add new instance data to allocator structure.
15083 This in combination with _AllocatorGetClusterData will create the
15084 correct structure needed as input for the allocator.
15086 The checks for the completeness of the opcode must have already been
15090 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15092 if self.disk_template in constants.DTS_INT_MIRROR:
15093 self.required_nodes = 2
15095 self.required_nodes = 1
15099 "disk_template": self.disk_template,
15102 "vcpus": self.vcpus,
15103 "memory": self.memory,
15104 "spindle_use": self.spindle_use,
15105 "disks": self.disks,
15106 "disk_space_total": disk_space,
15108 "required_nodes": self.required_nodes,
15109 "hypervisor": self.hypervisor,
15114 def _AddRelocateInstance(self):
15115 """Add relocate instance data to allocator structure.
15117 This in combination with _IAllocatorGetClusterData will create the
15118 correct structure needed as input for the allocator.
15120 The checks for the completeness of the opcode must have already been
15124 instance = self.cfg.GetInstanceInfo(self.name)
15125 if instance is None:
15126 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15127 " IAllocator" % self.name)
15129 if instance.disk_template not in constants.DTS_MIRRORED:
15130 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15131 errors.ECODE_INVAL)
15133 if instance.disk_template in constants.DTS_INT_MIRROR and \
15134 len(instance.secondary_nodes) != 1:
15135 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15136 errors.ECODE_STATE)
15138 self.required_nodes = 1
15139 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15140 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15144 "disk_space_total": disk_space,
15145 "required_nodes": self.required_nodes,
15146 "relocate_from": self.relocate_from,
15150 def _AddNodeEvacuate(self):
15151 """Get data for node-evacuate requests.
15155 "instances": self.instances,
15156 "evac_mode": self.evac_mode,
15159 def _AddChangeGroup(self):
15160 """Get data for node-evacuate requests.
15164 "instances": self.instances,
15165 "target_groups": self.target_groups,
15168 def _BuildInputData(self, fn, keydata):
15169 """Build input data structures.
15172 self._ComputeClusterData()
15175 request["type"] = self.mode
15176 for keyname, keytype in keydata:
15177 if keyname not in request:
15178 raise errors.ProgrammerError("Request parameter %s is missing" %
15180 val = request[keyname]
15181 if not keytype(val):
15182 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15183 " validation, value %s, expected"
15184 " type %s" % (keyname, val, keytype))
15185 self.in_data["request"] = request
15187 self.in_text = serializer.Dump(self.in_data)
15189 _STRING_LIST = ht.TListOf(ht.TString)
15190 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15191 # pylint: disable=E1101
15192 # Class '...' has no 'OP_ID' member
15193 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15194 opcodes.OpInstanceMigrate.OP_ID,
15195 opcodes.OpInstanceReplaceDisks.OP_ID])
15199 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15200 ht.TItems([ht.TNonEmptyString,
15201 ht.TNonEmptyString,
15202 ht.TListOf(ht.TNonEmptyString),
15205 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15206 ht.TItems([ht.TNonEmptyString,
15209 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15210 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15213 constants.IALLOCATOR_MODE_ALLOC:
15216 ("name", ht.TString),
15217 ("memory", ht.TInt),
15218 ("spindle_use", ht.TInt),
15219 ("disks", ht.TListOf(ht.TDict)),
15220 ("disk_template", ht.TString),
15221 ("os", ht.TString),
15222 ("tags", _STRING_LIST),
15223 ("nics", ht.TListOf(ht.TDict)),
15224 ("vcpus", ht.TInt),
15225 ("hypervisor", ht.TString),
15227 constants.IALLOCATOR_MODE_RELOC:
15228 (_AddRelocateInstance,
15229 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15231 constants.IALLOCATOR_MODE_NODE_EVAC:
15232 (_AddNodeEvacuate, [
15233 ("instances", _STRING_LIST),
15234 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15236 constants.IALLOCATOR_MODE_CHG_GROUP:
15237 (_AddChangeGroup, [
15238 ("instances", _STRING_LIST),
15239 ("target_groups", _STRING_LIST),
15243 def Run(self, name, validate=True, call_fn=None):
15244 """Run an instance allocator and return the results.
15247 if call_fn is None:
15248 call_fn = self.rpc.call_iallocator_runner
15250 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15251 result.Raise("Failure while running the iallocator script")
15253 self.out_text = result.payload
15255 self._ValidateResult()
15257 def _ValidateResult(self):
15258 """Process the allocator results.
15260 This will process and if successful save the result in
15261 self.out_data and the other parameters.
15265 rdict = serializer.Load(self.out_text)
15266 except Exception, err:
15267 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15269 if not isinstance(rdict, dict):
15270 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15272 # TODO: remove backwards compatiblity in later versions
15273 if "nodes" in rdict and "result" not in rdict:
15274 rdict["result"] = rdict["nodes"]
15277 for key in "success", "info", "result":
15278 if key not in rdict:
15279 raise errors.OpExecError("Can't parse iallocator results:"
15280 " missing key '%s'" % key)
15281 setattr(self, key, rdict[key])
15283 if not self._result_check(self.result):
15284 raise errors.OpExecError("Iallocator returned invalid result,"
15285 " expected %s, got %s" %
15286 (self._result_check, self.result),
15287 errors.ECODE_INVAL)
15289 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15290 assert self.relocate_from is not None
15291 assert self.required_nodes == 1
15293 node2group = dict((name, ndata["group"])
15294 for (name, ndata) in self.in_data["nodes"].items())
15296 fn = compat.partial(self._NodesToGroups, node2group,
15297 self.in_data["nodegroups"])
15299 instance = self.cfg.GetInstanceInfo(self.name)
15300 request_groups = fn(self.relocate_from + [instance.primary_node])
15301 result_groups = fn(rdict["result"] + [instance.primary_node])
15303 if self.success and not set(result_groups).issubset(request_groups):
15304 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15305 " differ from original groups (%s)" %
15306 (utils.CommaJoin(result_groups),
15307 utils.CommaJoin(request_groups)))
15309 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15310 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15312 self.out_data = rdict
15315 def _NodesToGroups(node2group, groups, nodes):
15316 """Returns a list of unique group names for a list of nodes.
15318 @type node2group: dict
15319 @param node2group: Map from node name to group UUID
15321 @param groups: Group information
15323 @param nodes: Node names
15330 group_uuid = node2group[node]
15332 # Ignore unknown node
15336 group = groups[group_uuid]
15338 # Can't find group, let's use UUID
15339 group_name = group_uuid
15341 group_name = group["name"]
15343 result.add(group_name)
15345 return sorted(result)
15348 class LUTestAllocator(NoHooksLU):
15349 """Run allocator tests.
15351 This LU runs the allocator tests
15354 def CheckPrereq(self):
15355 """Check prerequisites.
15357 This checks the opcode parameters depending on the director and mode test.
15360 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15361 for attr in ["memory", "disks", "disk_template",
15362 "os", "tags", "nics", "vcpus"]:
15363 if not hasattr(self.op, attr):
15364 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15365 attr, errors.ECODE_INVAL)
15366 iname = self.cfg.ExpandInstanceName(self.op.name)
15367 if iname is not None:
15368 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15369 iname, errors.ECODE_EXISTS)
15370 if not isinstance(self.op.nics, list):
15371 raise errors.OpPrereqError("Invalid parameter 'nics'",
15372 errors.ECODE_INVAL)
15373 if not isinstance(self.op.disks, list):
15374 raise errors.OpPrereqError("Invalid parameter 'disks'",
15375 errors.ECODE_INVAL)
15376 for row in self.op.disks:
15377 if (not isinstance(row, dict) or
15378 constants.IDISK_SIZE not in row or
15379 not isinstance(row[constants.IDISK_SIZE], int) or
15380 constants.IDISK_MODE not in row or
15381 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15382 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15383 " parameter", errors.ECODE_INVAL)
15384 if self.op.hypervisor is None:
15385 self.op.hypervisor = self.cfg.GetHypervisorType()
15386 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15387 fname = _ExpandInstanceName(self.cfg, self.op.name)
15388 self.op.name = fname
15389 self.relocate_from = \
15390 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15391 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15392 constants.IALLOCATOR_MODE_NODE_EVAC):
15393 if not self.op.instances:
15394 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15395 self.op.instances = _GetWantedInstances(self, self.op.instances)
15397 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15398 self.op.mode, errors.ECODE_INVAL)
15400 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15401 if self.op.allocator is None:
15402 raise errors.OpPrereqError("Missing allocator name",
15403 errors.ECODE_INVAL)
15404 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15405 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15406 self.op.direction, errors.ECODE_INVAL)
15408 def Exec(self, feedback_fn):
15409 """Run the allocator test.
15412 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15413 ial = IAllocator(self.cfg, self.rpc,
15416 memory=self.op.memory,
15417 disks=self.op.disks,
15418 disk_template=self.op.disk_template,
15422 vcpus=self.op.vcpus,
15423 hypervisor=self.op.hypervisor,
15425 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15426 ial = IAllocator(self.cfg, self.rpc,
15429 relocate_from=list(self.relocate_from),
15431 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15432 ial = IAllocator(self.cfg, self.rpc,
15434 instances=self.op.instances,
15435 target_groups=self.op.target_groups)
15436 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15437 ial = IAllocator(self.cfg, self.rpc,
15439 instances=self.op.instances,
15440 evac_mode=self.op.evac_mode)
15442 raise errors.ProgrammerError("Uncatched mode %s in"
15443 " LUTestAllocator.Exec", self.op.mode)
15445 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15446 result = ial.in_text
15448 ial.Run(self.op.allocator, validate=False)
15449 result = ial.out_text
15453 #: Query type implementations
15455 constants.QR_CLUSTER: _ClusterQuery,
15456 constants.QR_INSTANCE: _InstanceQuery,
15457 constants.QR_NODE: _NodeQuery,
15458 constants.QR_GROUP: _GroupQuery,
15459 constants.QR_OS: _OsQuery,
15460 constants.QR_EXPORT: _ExportQuery,
15463 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15466 def _GetQueryImplementation(name):
15467 """Returns the implemtnation for a query type.
15469 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15473 return _QUERY_IMPL[name]
15475 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15476 errors.ECODE_INVAL)