4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
645 """Checks if the owned node groups are still correct for an instance.
647 @type cfg: L{config.ConfigWriter}
648 @param cfg: The cluster configuration
649 @type instance_name: string
650 @param instance_name: Instance name
651 @type owned_groups: set or frozenset
652 @param owned_groups: List of currently owned node groups
653 @type primary_only: boolean
654 @param primary_only: Whether to check node groups for only the primary node
657 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
659 if not owned_groups.issuperset(inst_groups):
660 raise errors.OpPrereqError("Instance %s's node groups changed since"
661 " locks were acquired, current groups are"
662 " are '%s', owning groups '%s'; retry the"
665 utils.CommaJoin(inst_groups),
666 utils.CommaJoin(owned_groups)),
672 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
673 """Checks if the instances in a node group are still correct.
675 @type cfg: L{config.ConfigWriter}
676 @param cfg: The cluster configuration
677 @type group_uuid: string
678 @param group_uuid: Node group UUID
679 @type owned_instances: set or frozenset
680 @param owned_instances: List of currently owned instances
683 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
684 if owned_instances != wanted_instances:
685 raise errors.OpPrereqError("Instances in node group '%s' changed since"
686 " locks were acquired, wanted '%s', have '%s';"
687 " retry the operation" %
689 utils.CommaJoin(wanted_instances),
690 utils.CommaJoin(owned_instances)),
693 return wanted_instances
696 def _SupportsOob(cfg, node):
697 """Tells if node supports OOB.
699 @type cfg: L{config.ConfigWriter}
700 @param cfg: The cluster configuration
701 @type node: L{objects.Node}
702 @param node: The node
703 @return: The OOB script if supported or an empty string otherwise
706 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
709 def _GetWantedNodes(lu, nodes):
710 """Returns list of checked and expanded node names.
712 @type lu: L{LogicalUnit}
713 @param lu: the logical unit on whose behalf we execute
715 @param nodes: list of node names or None for all nodes
717 @return: the list of nodes, sorted
718 @raise errors.ProgrammerError: if the nodes parameter is wrong type
722 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
724 return utils.NiceSort(lu.cfg.GetNodeList())
727 def _GetWantedInstances(lu, instances):
728 """Returns list of checked and expanded instance names.
730 @type lu: L{LogicalUnit}
731 @param lu: the logical unit on whose behalf we execute
732 @type instances: list
733 @param instances: list of instance names or None for all instances
735 @return: the list of instances, sorted
736 @raise errors.OpPrereqError: if the instances parameter is wrong type
737 @raise errors.OpPrereqError: if any of the passed instances is not found
741 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
743 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
747 def _GetUpdatedParams(old_params, update_dict,
748 use_default=True, use_none=False):
749 """Return the new version of a parameter dictionary.
751 @type old_params: dict
752 @param old_params: old parameters
753 @type update_dict: dict
754 @param update_dict: dict containing new parameter values, or
755 constants.VALUE_DEFAULT to reset the parameter to its default
757 @param use_default: boolean
758 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
759 values as 'to be deleted' values
760 @param use_none: boolean
761 @type use_none: whether to recognise C{None} values as 'to be
764 @return: the new parameter dictionary
767 params_copy = copy.deepcopy(old_params)
768 for key, val in update_dict.iteritems():
769 if ((use_default and val == constants.VALUE_DEFAULT) or
770 (use_none and val is None)):
776 params_copy[key] = val
780 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
781 """Return the new version of a instance policy.
783 @param group_policy: whether this policy applies to a group and thus
784 we should support removal of policy entries
787 use_none = use_default = group_policy
788 ipolicy = copy.deepcopy(old_ipolicy)
789 for key, value in new_ipolicy.items():
790 if key not in constants.IPOLICY_ALL_KEYS:
791 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
793 if key in constants.IPOLICY_ISPECS:
794 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
795 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
797 use_default=use_default)
799 if (not value or value == [constants.VALUE_DEFAULT] or
800 value == constants.VALUE_DEFAULT):
804 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
805 " on the cluster'" % key,
808 if key in constants.IPOLICY_PARAMETERS:
809 # FIXME: we assume all such values are float
811 ipolicy[key] = float(value)
812 except (TypeError, ValueError), err:
813 raise errors.OpPrereqError("Invalid value for attribute"
814 " '%s': '%s', error: %s" %
815 (key, value, err), errors.ECODE_INVAL)
817 # FIXME: we assume all others are lists; this should be redone
819 ipolicy[key] = list(value)
821 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
822 except errors.ConfigurationError, err:
823 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
828 def _UpdateAndVerifySubDict(base, updates, type_check):
829 """Updates and verifies a dict with sub dicts of the same type.
831 @param base: The dict with the old data
832 @param updates: The dict with the new data
833 @param type_check: Dict suitable to ForceDictType to verify correct types
834 @returns: A new dict with updated and verified values
838 new = _GetUpdatedParams(old, value)
839 utils.ForceDictType(new, type_check)
842 ret = copy.deepcopy(base)
843 ret.update(dict((key, fn(base.get(key, {}), value))
844 for key, value in updates.items()))
848 def _MergeAndVerifyHvState(op_input, obj_input):
849 """Combines the hv state from an opcode with the one of the object
851 @param op_input: The input dict from the opcode
852 @param obj_input: The input dict from the objects
853 @return: The verified and updated dict
857 invalid_hvs = set(op_input) - constants.HYPER_TYPES
859 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
860 " %s" % utils.CommaJoin(invalid_hvs),
862 if obj_input is None:
864 type_check = constants.HVSTS_PARAMETER_TYPES
865 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
870 def _MergeAndVerifyDiskState(op_input, obj_input):
871 """Combines the disk state from an opcode with the one of the object
873 @param op_input: The input dict from the opcode
874 @param obj_input: The input dict from the objects
875 @return: The verified and updated dict
878 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
880 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
881 utils.CommaJoin(invalid_dst),
883 type_check = constants.DSS_PARAMETER_TYPES
884 if obj_input is None:
886 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
888 for key, value in op_input.items())
893 def _ReleaseLocks(lu, level, names=None, keep=None):
894 """Releases locks owned by an LU.
896 @type lu: L{LogicalUnit}
897 @param level: Lock level
898 @type names: list or None
899 @param names: Names of locks to release
900 @type keep: list or None
901 @param keep: Names of locks to retain
904 assert not (keep is not None and names is not None), \
905 "Only one of the 'names' and the 'keep' parameters can be given"
907 if names is not None:
908 should_release = names.__contains__
910 should_release = lambda name: name not in keep
912 should_release = None
914 owned = lu.owned_locks(level)
916 # Not owning any lock at this level, do nothing
923 # Determine which locks to release
925 if should_release(name):
930 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
932 # Release just some locks
933 lu.glm.release(level, names=release)
935 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
938 lu.glm.release(level)
940 assert not lu.glm.is_owned(level), "No locks should be owned"
943 def _MapInstanceDisksToNodes(instances):
944 """Creates a map from (node, volume) to instance name.
946 @type instances: list of L{objects.Instance}
947 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
950 return dict(((node, vol), inst.name)
951 for inst in instances
952 for (node, vols) in inst.MapLVsByNode().items()
956 def _RunPostHook(lu, node_name):
957 """Runs the post-hook for an opcode on a single node.
960 hm = lu.proc.BuildHooksManager(lu)
962 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
964 # pylint: disable=W0702
965 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
968 def _CheckOutputFields(static, dynamic, selected):
969 """Checks whether all selected fields are valid.
971 @type static: L{utils.FieldSet}
972 @param static: static fields set
973 @type dynamic: L{utils.FieldSet}
974 @param dynamic: dynamic fields set
981 delta = f.NonMatching(selected)
983 raise errors.OpPrereqError("Unknown output fields selected: %s"
984 % ",".join(delta), errors.ECODE_INVAL)
987 def _CheckGlobalHvParams(params):
988 """Validates that given hypervisor params are not global ones.
990 This will ensure that instances don't get customised versions of
994 used_globals = constants.HVC_GLOBALS.intersection(params)
996 msg = ("The following hypervisor parameters are global and cannot"
997 " be customized at instance level, please modify them at"
998 " cluster level: %s" % utils.CommaJoin(used_globals))
999 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1002 def _CheckNodeOnline(lu, node, msg=None):
1003 """Ensure that a given node is online.
1005 @param lu: the LU on behalf of which we make the check
1006 @param node: the node to check
1007 @param msg: if passed, should be a message to replace the default one
1008 @raise errors.OpPrereqError: if the node is offline
1012 msg = "Can't use offline node"
1013 if lu.cfg.GetNodeInfo(node).offline:
1014 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1017 def _CheckNodeNotDrained(lu, node):
1018 """Ensure that a given node is not drained.
1020 @param lu: the LU on behalf of which we make the check
1021 @param node: the node to check
1022 @raise errors.OpPrereqError: if the node is drained
1025 if lu.cfg.GetNodeInfo(node).drained:
1026 raise errors.OpPrereqError("Can't use drained node %s" % node,
1030 def _CheckNodeVmCapable(lu, node):
1031 """Ensure that a given node is vm capable.
1033 @param lu: the LU on behalf of which we make the check
1034 @param node: the node to check
1035 @raise errors.OpPrereqError: if the node is not vm capable
1038 if not lu.cfg.GetNodeInfo(node).vm_capable:
1039 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1043 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1044 """Ensure that a node supports a given OS.
1046 @param lu: the LU on behalf of which we make the check
1047 @param node: the node to check
1048 @param os_name: the OS to query about
1049 @param force_variant: whether to ignore variant errors
1050 @raise errors.OpPrereqError: if the node is not supporting the OS
1053 result = lu.rpc.call_os_get(node, os_name)
1054 result.Raise("OS '%s' not in supported OS list for node %s" %
1056 prereq=True, ecode=errors.ECODE_INVAL)
1057 if not force_variant:
1058 _CheckOSVariant(result.payload, os_name)
1061 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1062 """Ensure that a node has the given secondary ip.
1064 @type lu: L{LogicalUnit}
1065 @param lu: the LU on behalf of which we make the check
1067 @param node: the node to check
1068 @type secondary_ip: string
1069 @param secondary_ip: the ip to check
1070 @type prereq: boolean
1071 @param prereq: whether to throw a prerequisite or an execute error
1072 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1073 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1076 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1077 result.Raise("Failure checking secondary ip on node %s" % node,
1078 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1079 if not result.payload:
1080 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1081 " please fix and re-run this command" % secondary_ip)
1083 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1085 raise errors.OpExecError(msg)
1088 def _GetClusterDomainSecret():
1089 """Reads the cluster domain secret.
1092 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1096 def _CheckInstanceState(lu, instance, req_states, msg=None):
1097 """Ensure that an instance is in one of the required states.
1099 @param lu: the LU on behalf of which we make the check
1100 @param instance: the instance to check
1101 @param msg: if passed, should be a message to replace the default one
1102 @raise errors.OpPrereqError: if the instance is not in the required state
1106 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1107 if instance.admin_state not in req_states:
1108 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1109 (instance.name, instance.admin_state, msg),
1112 if constants.ADMINST_UP not in req_states:
1113 pnode = instance.primary_node
1114 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1115 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1116 prereq=True, ecode=errors.ECODE_ENVIRON)
1118 if instance.name in ins_l.payload:
1119 raise errors.OpPrereqError("Instance %s is running, %s" %
1120 (instance.name, msg), errors.ECODE_STATE)
1123 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1124 """Computes if value is in the desired range.
1126 @param name: name of the parameter for which we perform the check
1127 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1129 @param ipolicy: dictionary containing min, max and std values
1130 @param value: actual value that we want to use
1131 @return: None or element not meeting the criteria
1135 if value in [None, constants.VALUE_AUTO]:
1137 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1138 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1139 if value > max_v or min_v > value:
1141 fqn = "%s/%s" % (name, qualifier)
1144 return ("%s value %s is not in range [%s, %s]" %
1145 (fqn, value, min_v, max_v))
1149 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1150 nic_count, disk_sizes, spindle_use,
1151 _compute_fn=_ComputeMinMaxSpec):
1152 """Verifies ipolicy against provided specs.
1155 @param ipolicy: The ipolicy
1157 @param mem_size: The memory size
1158 @type cpu_count: int
1159 @param cpu_count: Used cpu cores
1160 @type disk_count: int
1161 @param disk_count: Number of disks used
1162 @type nic_count: int
1163 @param nic_count: Number of nics used
1164 @type disk_sizes: list of ints
1165 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1166 @type spindle_use: int
1167 @param spindle_use: The number of spindles this instance uses
1168 @param _compute_fn: The compute function (unittest only)
1169 @return: A list of violations, or an empty list of no violations are found
1172 assert disk_count == len(disk_sizes)
1175 (constants.ISPEC_MEM_SIZE, "", mem_size),
1176 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1177 (constants.ISPEC_DISK_COUNT, "", disk_count),
1178 (constants.ISPEC_NIC_COUNT, "", nic_count),
1179 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1180 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1181 for idx, d in enumerate(disk_sizes)]
1184 (_compute_fn(name, qualifier, ipolicy, value)
1185 for (name, qualifier, value) in test_settings))
1188 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1189 _compute_fn=_ComputeIPolicySpecViolation):
1190 """Compute if instance meets the specs of ipolicy.
1193 @param ipolicy: The ipolicy to verify against
1194 @type instance: L{objects.Instance}
1195 @param instance: The instance to verify
1196 @param _compute_fn: The function to verify ipolicy (unittest only)
1197 @see: L{_ComputeIPolicySpecViolation}
1200 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1201 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1202 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1203 disk_count = len(instance.disks)
1204 disk_sizes = [disk.size for disk in instance.disks]
1205 nic_count = len(instance.nics)
1207 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1208 disk_sizes, spindle_use)
1211 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1212 _compute_fn=_ComputeIPolicySpecViolation):
1213 """Compute if instance specs meets the specs of ipolicy.
1216 @param ipolicy: The ipolicy to verify against
1217 @param instance_spec: dict
1218 @param instance_spec: The instance spec to verify
1219 @param _compute_fn: The function to verify ipolicy (unittest only)
1220 @see: L{_ComputeIPolicySpecViolation}
1223 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1224 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1225 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1226 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1227 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1228 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1230 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1231 disk_sizes, spindle_use)
1234 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1236 _compute_fn=_ComputeIPolicyInstanceViolation):
1237 """Compute if instance meets the specs of the new target group.
1239 @param ipolicy: The ipolicy to verify
1240 @param instance: The instance object to verify
1241 @param current_group: The current group of the instance
1242 @param target_group: The new group of the instance
1243 @param _compute_fn: The function to verify ipolicy (unittest only)
1244 @see: L{_ComputeIPolicySpecViolation}
1247 if current_group == target_group:
1250 return _compute_fn(ipolicy, instance)
1253 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1254 _compute_fn=_ComputeIPolicyNodeViolation):
1255 """Checks that the target node is correct in terms of instance policy.
1257 @param ipolicy: The ipolicy to verify
1258 @param instance: The instance object to verify
1259 @param node: The new node to relocate
1260 @param ignore: Ignore violations of the ipolicy
1261 @param _compute_fn: The function to verify ipolicy (unittest only)
1262 @see: L{_ComputeIPolicySpecViolation}
1265 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1266 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1269 msg = ("Instance does not meet target node group's (%s) instance"
1270 " policy: %s") % (node.group, utils.CommaJoin(res))
1274 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1277 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1278 """Computes a set of any instances that would violate the new ipolicy.
1280 @param old_ipolicy: The current (still in-place) ipolicy
1281 @param new_ipolicy: The new (to become) ipolicy
1282 @param instances: List of instances to verify
1283 @return: A list of instances which violates the new ipolicy but
1287 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1288 _ComputeViolatingInstances(old_ipolicy, instances))
1291 def _ExpandItemName(fn, name, kind):
1292 """Expand an item name.
1294 @param fn: the function to use for expansion
1295 @param name: requested item name
1296 @param kind: text description ('Node' or 'Instance')
1297 @return: the resolved (full) name
1298 @raise errors.OpPrereqError: if the item is not found
1301 full_name = fn(name)
1302 if full_name is None:
1303 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1308 def _ExpandNodeName(cfg, name):
1309 """Wrapper over L{_ExpandItemName} for nodes."""
1310 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1313 def _ExpandInstanceName(cfg, name):
1314 """Wrapper over L{_ExpandItemName} for instance."""
1315 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1318 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1319 minmem, maxmem, vcpus, nics, disk_template, disks,
1320 bep, hvp, hypervisor_name, tags):
1321 """Builds instance related env variables for hooks
1323 This builds the hook environment from individual variables.
1326 @param name: the name of the instance
1327 @type primary_node: string
1328 @param primary_node: the name of the instance's primary node
1329 @type secondary_nodes: list
1330 @param secondary_nodes: list of secondary nodes as strings
1331 @type os_type: string
1332 @param os_type: the name of the instance's OS
1333 @type status: string
1334 @param status: the desired status of the instance
1335 @type minmem: string
1336 @param minmem: the minimum memory size of the instance
1337 @type maxmem: string
1338 @param maxmem: the maximum memory size of the instance
1340 @param vcpus: the count of VCPUs the instance has
1342 @param nics: list of tuples (ip, mac, mode, link) representing
1343 the NICs the instance has
1344 @type disk_template: string
1345 @param disk_template: the disk template of the instance
1347 @param disks: the list of (size, mode) pairs
1349 @param bep: the backend parameters for the instance
1351 @param hvp: the hypervisor parameters for the instance
1352 @type hypervisor_name: string
1353 @param hypervisor_name: the hypervisor for the instance
1355 @param tags: list of instance tags as strings
1357 @return: the hook environment for this instance
1362 "INSTANCE_NAME": name,
1363 "INSTANCE_PRIMARY": primary_node,
1364 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1365 "INSTANCE_OS_TYPE": os_type,
1366 "INSTANCE_STATUS": status,
1367 "INSTANCE_MINMEM": minmem,
1368 "INSTANCE_MAXMEM": maxmem,
1369 # TODO(2.7) remove deprecated "memory" value
1370 "INSTANCE_MEMORY": maxmem,
1371 "INSTANCE_VCPUS": vcpus,
1372 "INSTANCE_DISK_TEMPLATE": disk_template,
1373 "INSTANCE_HYPERVISOR": hypervisor_name,
1376 nic_count = len(nics)
1377 for idx, (ip, mac, mode, link) in enumerate(nics):
1380 env["INSTANCE_NIC%d_IP" % idx] = ip
1381 env["INSTANCE_NIC%d_MAC" % idx] = mac
1382 env["INSTANCE_NIC%d_MODE" % idx] = mode
1383 env["INSTANCE_NIC%d_LINK" % idx] = link
1384 if mode == constants.NIC_MODE_BRIDGED:
1385 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1389 env["INSTANCE_NIC_COUNT"] = nic_count
1392 disk_count = len(disks)
1393 for idx, (size, mode) in enumerate(disks):
1394 env["INSTANCE_DISK%d_SIZE" % idx] = size
1395 env["INSTANCE_DISK%d_MODE" % idx] = mode
1399 env["INSTANCE_DISK_COUNT"] = disk_count
1404 env["INSTANCE_TAGS"] = " ".join(tags)
1406 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1407 for key, value in source.items():
1408 env["INSTANCE_%s_%s" % (kind, key)] = value
1413 def _NICListToTuple(lu, nics):
1414 """Build a list of nic information tuples.
1416 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1417 value in LUInstanceQueryData.
1419 @type lu: L{LogicalUnit}
1420 @param lu: the logical unit on whose behalf we execute
1421 @type nics: list of L{objects.NIC}
1422 @param nics: list of nics to convert to hooks tuples
1426 cluster = lu.cfg.GetClusterInfo()
1430 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1431 mode = filled_params[constants.NIC_MODE]
1432 link = filled_params[constants.NIC_LINK]
1433 hooks_nics.append((ip, mac, mode, link))
1437 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1438 """Builds instance related env variables for hooks from an object.
1440 @type lu: L{LogicalUnit}
1441 @param lu: the logical unit on whose behalf we execute
1442 @type instance: L{objects.Instance}
1443 @param instance: the instance for which we should build the
1445 @type override: dict
1446 @param override: dictionary with key/values that will override
1449 @return: the hook environment dictionary
1452 cluster = lu.cfg.GetClusterInfo()
1453 bep = cluster.FillBE(instance)
1454 hvp = cluster.FillHV(instance)
1456 "name": instance.name,
1457 "primary_node": instance.primary_node,
1458 "secondary_nodes": instance.secondary_nodes,
1459 "os_type": instance.os,
1460 "status": instance.admin_state,
1461 "maxmem": bep[constants.BE_MAXMEM],
1462 "minmem": bep[constants.BE_MINMEM],
1463 "vcpus": bep[constants.BE_VCPUS],
1464 "nics": _NICListToTuple(lu, instance.nics),
1465 "disk_template": instance.disk_template,
1466 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1469 "hypervisor_name": instance.hypervisor,
1470 "tags": instance.tags,
1473 args.update(override)
1474 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1477 def _AdjustCandidatePool(lu, exceptions):
1478 """Adjust the candidate pool after node operations.
1481 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1483 lu.LogInfo("Promoted nodes to master candidate role: %s",
1484 utils.CommaJoin(node.name for node in mod_list))
1485 for name in mod_list:
1486 lu.context.ReaddNode(name)
1487 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1489 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1493 def _DecideSelfPromotion(lu, exceptions=None):
1494 """Decide whether I should promote myself as a master candidate.
1497 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1498 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1499 # the new node will increase mc_max with one, so:
1500 mc_should = min(mc_should + 1, cp_size)
1501 return mc_now < mc_should
1504 def _CalculateGroupIPolicy(cluster, group):
1505 """Calculate instance policy for group.
1508 return cluster.SimpleFillIPolicy(group.ipolicy)
1511 def _ComputeViolatingInstances(ipolicy, instances):
1512 """Computes a set of instances who violates given ipolicy.
1514 @param ipolicy: The ipolicy to verify
1515 @type instances: object.Instance
1516 @param instances: List of instances to verify
1517 @return: A frozenset of instance names violating the ipolicy
1520 return frozenset([inst.name for inst in instances
1521 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1524 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1525 """Check that the brigdes needed by a list of nics exist.
1528 cluster = lu.cfg.GetClusterInfo()
1529 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1530 brlist = [params[constants.NIC_LINK] for params in paramslist
1531 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1533 result = lu.rpc.call_bridges_exist(target_node, brlist)
1534 result.Raise("Error checking bridges on destination node '%s'" %
1535 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1538 def _CheckInstanceBridgesExist(lu, instance, node=None):
1539 """Check that the brigdes needed by an instance exist.
1543 node = instance.primary_node
1544 _CheckNicsBridgesExist(lu, instance.nics, node)
1547 def _CheckOSVariant(os_obj, name):
1548 """Check whether an OS name conforms to the os variants specification.
1550 @type os_obj: L{objects.OS}
1551 @param os_obj: OS object to check
1553 @param name: OS name passed by the user, to check for validity
1556 variant = objects.OS.GetVariant(name)
1557 if not os_obj.supported_variants:
1559 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1560 " passed)" % (os_obj.name, variant),
1564 raise errors.OpPrereqError("OS name must include a variant",
1567 if variant not in os_obj.supported_variants:
1568 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1571 def _GetNodeInstancesInner(cfg, fn):
1572 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1575 def _GetNodeInstances(cfg, node_name):
1576 """Returns a list of all primary and secondary instances on a node.
1580 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1583 def _GetNodePrimaryInstances(cfg, node_name):
1584 """Returns primary instances on a node.
1587 return _GetNodeInstancesInner(cfg,
1588 lambda inst: node_name == inst.primary_node)
1591 def _GetNodeSecondaryInstances(cfg, node_name):
1592 """Returns secondary instances on a node.
1595 return _GetNodeInstancesInner(cfg,
1596 lambda inst: node_name in inst.secondary_nodes)
1599 def _GetStorageTypeArgs(cfg, storage_type):
1600 """Returns the arguments for a storage type.
1603 # Special case for file storage
1604 if storage_type == constants.ST_FILE:
1605 # storage.FileStorage wants a list of storage directories
1606 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1611 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1614 for dev in instance.disks:
1615 cfg.SetDiskID(dev, node_name)
1617 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1619 result.Raise("Failed to get disk status from node %s" % node_name,
1620 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1622 for idx, bdev_status in enumerate(result.payload):
1623 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1629 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1630 """Check the sanity of iallocator and node arguments and use the
1631 cluster-wide iallocator if appropriate.
1633 Check that at most one of (iallocator, node) is specified. If none is
1634 specified, then the LU's opcode's iallocator slot is filled with the
1635 cluster-wide default iallocator.
1637 @type iallocator_slot: string
1638 @param iallocator_slot: the name of the opcode iallocator slot
1639 @type node_slot: string
1640 @param node_slot: the name of the opcode target node slot
1643 node = getattr(lu.op, node_slot, None)
1644 iallocator = getattr(lu.op, iallocator_slot, None)
1646 if node is not None and iallocator is not None:
1647 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1649 elif node is None and iallocator is None:
1650 default_iallocator = lu.cfg.GetDefaultIAllocator()
1651 if default_iallocator:
1652 setattr(lu.op, iallocator_slot, default_iallocator)
1654 raise errors.OpPrereqError("No iallocator or node given and no"
1655 " cluster-wide default iallocator found;"
1656 " please specify either an iallocator or a"
1657 " node, or set a cluster-wide default"
1661 def _GetDefaultIAllocator(cfg, iallocator):
1662 """Decides on which iallocator to use.
1664 @type cfg: L{config.ConfigWriter}
1665 @param cfg: Cluster configuration object
1666 @type iallocator: string or None
1667 @param iallocator: Iallocator specified in opcode
1669 @return: Iallocator name
1673 # Use default iallocator
1674 iallocator = cfg.GetDefaultIAllocator()
1677 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1678 " opcode nor as a cluster-wide default",
1684 class LUClusterPostInit(LogicalUnit):
1685 """Logical unit for running hooks after cluster initialization.
1688 HPATH = "cluster-init"
1689 HTYPE = constants.HTYPE_CLUSTER
1691 def BuildHooksEnv(self):
1696 "OP_TARGET": self.cfg.GetClusterName(),
1699 def BuildHooksNodes(self):
1700 """Build hooks nodes.
1703 return ([], [self.cfg.GetMasterNode()])
1705 def Exec(self, feedback_fn):
1712 class LUClusterDestroy(LogicalUnit):
1713 """Logical unit for destroying the cluster.
1716 HPATH = "cluster-destroy"
1717 HTYPE = constants.HTYPE_CLUSTER
1719 def BuildHooksEnv(self):
1724 "OP_TARGET": self.cfg.GetClusterName(),
1727 def BuildHooksNodes(self):
1728 """Build hooks nodes.
1733 def CheckPrereq(self):
1734 """Check prerequisites.
1736 This checks whether the cluster is empty.
1738 Any errors are signaled by raising errors.OpPrereqError.
1741 master = self.cfg.GetMasterNode()
1743 nodelist = self.cfg.GetNodeList()
1744 if len(nodelist) != 1 or nodelist[0] != master:
1745 raise errors.OpPrereqError("There are still %d node(s) in"
1746 " this cluster." % (len(nodelist) - 1),
1748 instancelist = self.cfg.GetInstanceList()
1750 raise errors.OpPrereqError("There are still %d instance(s) in"
1751 " this cluster." % len(instancelist),
1754 def Exec(self, feedback_fn):
1755 """Destroys the cluster.
1758 master_params = self.cfg.GetMasterNetworkParameters()
1760 # Run post hooks on master node before it's removed
1761 _RunPostHook(self, master_params.name)
1763 ems = self.cfg.GetUseExternalMipScript()
1764 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1767 self.LogWarning("Error disabling the master IP address: %s",
1770 return master_params.name
1773 def _VerifyCertificate(filename):
1774 """Verifies a certificate for L{LUClusterVerifyConfig}.
1776 @type filename: string
1777 @param filename: Path to PEM file
1781 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1782 utils.ReadFile(filename))
1783 except Exception, err: # pylint: disable=W0703
1784 return (LUClusterVerifyConfig.ETYPE_ERROR,
1785 "Failed to load X509 certificate %s: %s" % (filename, err))
1788 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1789 constants.SSL_CERT_EXPIRATION_ERROR)
1792 fnamemsg = "While verifying %s: %s" % (filename, msg)
1797 return (None, fnamemsg)
1798 elif errcode == utils.CERT_WARNING:
1799 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1800 elif errcode == utils.CERT_ERROR:
1801 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1803 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1806 def _GetAllHypervisorParameters(cluster, instances):
1807 """Compute the set of all hypervisor parameters.
1809 @type cluster: L{objects.Cluster}
1810 @param cluster: the cluster object
1811 @param instances: list of L{objects.Instance}
1812 @param instances: additional instances from which to obtain parameters
1813 @rtype: list of (origin, hypervisor, parameters)
1814 @return: a list with all parameters found, indicating the hypervisor they
1815 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1820 for hv_name in cluster.enabled_hypervisors:
1821 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1823 for os_name, os_hvp in cluster.os_hvp.items():
1824 for hv_name, hv_params in os_hvp.items():
1826 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1827 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1829 # TODO: collapse identical parameter values in a single one
1830 for instance in instances:
1831 if instance.hvparams:
1832 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1833 cluster.FillHV(instance)))
1838 class _VerifyErrors(object):
1839 """Mix-in for cluster/group verify LUs.
1841 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1842 self.op and self._feedback_fn to be available.)
1846 ETYPE_FIELD = "code"
1847 ETYPE_ERROR = "ERROR"
1848 ETYPE_WARNING = "WARNING"
1850 def _Error(self, ecode, item, msg, *args, **kwargs):
1851 """Format an error message.
1853 Based on the opcode's error_codes parameter, either format a
1854 parseable error code, or a simpler error string.
1856 This must be called only from Exec and functions called from Exec.
1859 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1860 itype, etxt, _ = ecode
1861 # first complete the msg
1864 # then format the whole message
1865 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1866 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1872 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1873 # and finally report it via the feedback_fn
1874 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1876 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1877 """Log an error message if the passed condition is True.
1881 or self.op.debug_simulate_errors) # pylint: disable=E1101
1883 # If the error code is in the list of ignored errors, demote the error to a
1885 (_, etxt, _) = ecode
1886 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1887 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1890 self._Error(ecode, *args, **kwargs)
1892 # do not mark the operation as failed for WARN cases only
1893 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1894 self.bad = self.bad or cond
1897 class LUClusterVerify(NoHooksLU):
1898 """Submits all jobs necessary to verify the cluster.
1903 def ExpandNames(self):
1904 self.needed_locks = {}
1906 def Exec(self, feedback_fn):
1909 if self.op.group_name:
1910 groups = [self.op.group_name]
1911 depends_fn = lambda: None
1913 groups = self.cfg.GetNodeGroupList()
1915 # Verify global configuration
1917 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1920 # Always depend on global verification
1921 depends_fn = lambda: [(-len(jobs), [])]
1923 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1924 ignore_errors=self.op.ignore_errors,
1925 depends=depends_fn())]
1926 for group in groups)
1928 # Fix up all parameters
1929 for op in itertools.chain(*jobs): # pylint: disable=W0142
1930 op.debug_simulate_errors = self.op.debug_simulate_errors
1931 op.verbose = self.op.verbose
1932 op.error_codes = self.op.error_codes
1934 op.skip_checks = self.op.skip_checks
1935 except AttributeError:
1936 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1938 return ResultWithJobs(jobs)
1941 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1942 """Verifies the cluster config.
1947 def _VerifyHVP(self, hvp_data):
1948 """Verifies locally the syntax of the hypervisor parameters.
1951 for item, hv_name, hv_params in hvp_data:
1952 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1955 hv_class = hypervisor.GetHypervisor(hv_name)
1956 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1957 hv_class.CheckParameterSyntax(hv_params)
1958 except errors.GenericError, err:
1959 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1961 def ExpandNames(self):
1962 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1963 self.share_locks = _ShareAll()
1965 def CheckPrereq(self):
1966 """Check prerequisites.
1969 # Retrieve all information
1970 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1971 self.all_node_info = self.cfg.GetAllNodesInfo()
1972 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1974 def Exec(self, feedback_fn):
1975 """Verify integrity of cluster, performing various test on nodes.
1979 self._feedback_fn = feedback_fn
1981 feedback_fn("* Verifying cluster config")
1983 for msg in self.cfg.VerifyConfig():
1984 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1986 feedback_fn("* Verifying cluster certificate files")
1988 for cert_filename in constants.ALL_CERT_FILES:
1989 (errcode, msg) = _VerifyCertificate(cert_filename)
1990 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1992 feedback_fn("* Verifying hypervisor parameters")
1994 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1995 self.all_inst_info.values()))
1997 feedback_fn("* Verifying all nodes belong to an existing group")
1999 # We do this verification here because, should this bogus circumstance
2000 # occur, it would never be caught by VerifyGroup, which only acts on
2001 # nodes/instances reachable from existing node groups.
2003 dangling_nodes = set(node.name for node in self.all_node_info.values()
2004 if node.group not in self.all_group_info)
2006 dangling_instances = {}
2007 no_node_instances = []
2009 for inst in self.all_inst_info.values():
2010 if inst.primary_node in dangling_nodes:
2011 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2012 elif inst.primary_node not in self.all_node_info:
2013 no_node_instances.append(inst.name)
2018 utils.CommaJoin(dangling_instances.get(node.name,
2020 for node in dangling_nodes]
2022 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2024 "the following nodes (and their instances) belong to a non"
2025 " existing group: %s", utils.CommaJoin(pretty_dangling))
2027 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2029 "the following instances have a non-existing primary-node:"
2030 " %s", utils.CommaJoin(no_node_instances))
2035 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2036 """Verifies the status of a node group.
2039 HPATH = "cluster-verify"
2040 HTYPE = constants.HTYPE_CLUSTER
2043 _HOOKS_INDENT_RE = re.compile("^", re.M)
2045 class NodeImage(object):
2046 """A class representing the logical and physical status of a node.
2049 @ivar name: the node name to which this object refers
2050 @ivar volumes: a structure as returned from
2051 L{ganeti.backend.GetVolumeList} (runtime)
2052 @ivar instances: a list of running instances (runtime)
2053 @ivar pinst: list of configured primary instances (config)
2054 @ivar sinst: list of configured secondary instances (config)
2055 @ivar sbp: dictionary of {primary-node: list of instances} for all
2056 instances for which this node is secondary (config)
2057 @ivar mfree: free memory, as reported by hypervisor (runtime)
2058 @ivar dfree: free disk, as reported by the node (runtime)
2059 @ivar offline: the offline status (config)
2060 @type rpc_fail: boolean
2061 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2062 not whether the individual keys were correct) (runtime)
2063 @type lvm_fail: boolean
2064 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2065 @type hyp_fail: boolean
2066 @ivar hyp_fail: whether the RPC call didn't return the instance list
2067 @type ghost: boolean
2068 @ivar ghost: whether this is a known node or not (config)
2069 @type os_fail: boolean
2070 @ivar os_fail: whether the RPC call didn't return valid OS data
2072 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2073 @type vm_capable: boolean
2074 @ivar vm_capable: whether the node can host instances
2077 def __init__(self, offline=False, name=None, vm_capable=True):
2086 self.offline = offline
2087 self.vm_capable = vm_capable
2088 self.rpc_fail = False
2089 self.lvm_fail = False
2090 self.hyp_fail = False
2092 self.os_fail = False
2095 def ExpandNames(self):
2096 # This raises errors.OpPrereqError on its own:
2097 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2099 # Get instances in node group; this is unsafe and needs verification later
2101 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2103 self.needed_locks = {
2104 locking.LEVEL_INSTANCE: inst_names,
2105 locking.LEVEL_NODEGROUP: [self.group_uuid],
2106 locking.LEVEL_NODE: [],
2109 self.share_locks = _ShareAll()
2111 def DeclareLocks(self, level):
2112 if level == locking.LEVEL_NODE:
2113 # Get members of node group; this is unsafe and needs verification later
2114 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2116 all_inst_info = self.cfg.GetAllInstancesInfo()
2118 # In Exec(), we warn about mirrored instances that have primary and
2119 # secondary living in separate node groups. To fully verify that
2120 # volumes for these instances are healthy, we will need to do an
2121 # extra call to their secondaries. We ensure here those nodes will
2123 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2124 # Important: access only the instances whose lock is owned
2125 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2126 nodes.update(all_inst_info[inst].secondary_nodes)
2128 self.needed_locks[locking.LEVEL_NODE] = nodes
2130 def CheckPrereq(self):
2131 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2132 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2134 group_nodes = set(self.group_info.members)
2136 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2139 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2141 unlocked_instances = \
2142 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2145 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2146 utils.CommaJoin(unlocked_nodes),
2149 if unlocked_instances:
2150 raise errors.OpPrereqError("Missing lock for instances: %s" %
2151 utils.CommaJoin(unlocked_instances),
2154 self.all_node_info = self.cfg.GetAllNodesInfo()
2155 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2157 self.my_node_names = utils.NiceSort(group_nodes)
2158 self.my_inst_names = utils.NiceSort(group_instances)
2160 self.my_node_info = dict((name, self.all_node_info[name])
2161 for name in self.my_node_names)
2163 self.my_inst_info = dict((name, self.all_inst_info[name])
2164 for name in self.my_inst_names)
2166 # We detect here the nodes that will need the extra RPC calls for verifying
2167 # split LV volumes; they should be locked.
2168 extra_lv_nodes = set()
2170 for inst in self.my_inst_info.values():
2171 if inst.disk_template in constants.DTS_INT_MIRROR:
2172 for nname in inst.all_nodes:
2173 if self.all_node_info[nname].group != self.group_uuid:
2174 extra_lv_nodes.add(nname)
2176 unlocked_lv_nodes = \
2177 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2179 if unlocked_lv_nodes:
2180 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2181 utils.CommaJoin(unlocked_lv_nodes),
2183 self.extra_lv_nodes = list(extra_lv_nodes)
2185 def _VerifyNode(self, ninfo, nresult):
2186 """Perform some basic validation on data returned from a node.
2188 - check the result data structure is well formed and has all the
2190 - check ganeti version
2192 @type ninfo: L{objects.Node}
2193 @param ninfo: the node to check
2194 @param nresult: the results from the node
2196 @return: whether overall this call was successful (and we can expect
2197 reasonable values in the respose)
2201 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2203 # main result, nresult should be a non-empty dict
2204 test = not nresult or not isinstance(nresult, dict)
2205 _ErrorIf(test, constants.CV_ENODERPC, node,
2206 "unable to verify node: no data returned")
2210 # compares ganeti version
2211 local_version = constants.PROTOCOL_VERSION
2212 remote_version = nresult.get("version", None)
2213 test = not (remote_version and
2214 isinstance(remote_version, (list, tuple)) and
2215 len(remote_version) == 2)
2216 _ErrorIf(test, constants.CV_ENODERPC, node,
2217 "connection to node returned invalid data")
2221 test = local_version != remote_version[0]
2222 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2223 "incompatible protocol versions: master %s,"
2224 " node %s", local_version, remote_version[0])
2228 # node seems compatible, we can actually try to look into its results
2230 # full package version
2231 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2232 constants.CV_ENODEVERSION, node,
2233 "software version mismatch: master %s, node %s",
2234 constants.RELEASE_VERSION, remote_version[1],
2235 code=self.ETYPE_WARNING)
2237 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2238 if ninfo.vm_capable and isinstance(hyp_result, dict):
2239 for hv_name, hv_result in hyp_result.iteritems():
2240 test = hv_result is not None
2241 _ErrorIf(test, constants.CV_ENODEHV, node,
2242 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2244 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2245 if ninfo.vm_capable and isinstance(hvp_result, list):
2246 for item, hv_name, hv_result in hvp_result:
2247 _ErrorIf(True, constants.CV_ENODEHV, node,
2248 "hypervisor %s parameter verify failure (source %s): %s",
2249 hv_name, item, hv_result)
2251 test = nresult.get(constants.NV_NODESETUP,
2252 ["Missing NODESETUP results"])
2253 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2258 def _VerifyNodeTime(self, ninfo, nresult,
2259 nvinfo_starttime, nvinfo_endtime):
2260 """Check the node time.
2262 @type ninfo: L{objects.Node}
2263 @param ninfo: the node to check
2264 @param nresult: the remote results for the node
2265 @param nvinfo_starttime: the start time of the RPC call
2266 @param nvinfo_endtime: the end time of the RPC call
2270 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2272 ntime = nresult.get(constants.NV_TIME, None)
2274 ntime_merged = utils.MergeTime(ntime)
2275 except (ValueError, TypeError):
2276 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2279 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2280 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2281 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2282 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2286 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2287 "Node time diverges by at least %s from master node time",
2290 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2291 """Check the node LVM results.
2293 @type ninfo: L{objects.Node}
2294 @param ninfo: the node to check
2295 @param nresult: the remote results for the node
2296 @param vg_name: the configured VG name
2303 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2305 # checks vg existence and size > 20G
2306 vglist = nresult.get(constants.NV_VGLIST, None)
2308 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2310 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2311 constants.MIN_VG_SIZE)
2312 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2315 pvlist = nresult.get(constants.NV_PVLIST, None)
2316 test = pvlist is None
2317 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2319 # check that ':' is not present in PV names, since it's a
2320 # special character for lvcreate (denotes the range of PEs to
2322 for _, pvname, owner_vg in pvlist:
2323 test = ":" in pvname
2324 _ErrorIf(test, constants.CV_ENODELVM, node,
2325 "Invalid character ':' in PV '%s' of VG '%s'",
2328 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2329 """Check the node bridges.
2331 @type ninfo: L{objects.Node}
2332 @param ninfo: the node to check
2333 @param nresult: the remote results for the node
2334 @param bridges: the expected list of bridges
2341 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2343 missing = nresult.get(constants.NV_BRIDGES, None)
2344 test = not isinstance(missing, list)
2345 _ErrorIf(test, constants.CV_ENODENET, node,
2346 "did not return valid bridge information")
2348 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2349 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2351 def _VerifyNodeUserScripts(self, ninfo, nresult):
2352 """Check the results of user scripts presence and executability on the node
2354 @type ninfo: L{objects.Node}
2355 @param ninfo: the node to check
2356 @param nresult: the remote results for the node
2361 test = not constants.NV_USERSCRIPTS in nresult
2362 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2363 "did not return user scripts information")
2365 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2367 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2368 "user scripts not present or not executable: %s" %
2369 utils.CommaJoin(sorted(broken_scripts)))
2371 def _VerifyNodeNetwork(self, ninfo, nresult):
2372 """Check the node network connectivity results.
2374 @type ninfo: L{objects.Node}
2375 @param ninfo: the node to check
2376 @param nresult: the remote results for the node
2380 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2382 test = constants.NV_NODELIST not in nresult
2383 _ErrorIf(test, constants.CV_ENODESSH, node,
2384 "node hasn't returned node ssh connectivity data")
2386 if nresult[constants.NV_NODELIST]:
2387 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2388 _ErrorIf(True, constants.CV_ENODESSH, node,
2389 "ssh communication with node '%s': %s", a_node, a_msg)
2391 test = constants.NV_NODENETTEST not in nresult
2392 _ErrorIf(test, constants.CV_ENODENET, node,
2393 "node hasn't returned node tcp connectivity data")
2395 if nresult[constants.NV_NODENETTEST]:
2396 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2398 _ErrorIf(True, constants.CV_ENODENET, node,
2399 "tcp communication with node '%s': %s",
2400 anode, nresult[constants.NV_NODENETTEST][anode])
2402 test = constants.NV_MASTERIP not in nresult
2403 _ErrorIf(test, constants.CV_ENODENET, node,
2404 "node hasn't returned node master IP reachability data")
2406 if not nresult[constants.NV_MASTERIP]:
2407 if node == self.master_node:
2408 msg = "the master node cannot reach the master IP (not configured?)"
2410 msg = "cannot reach the master IP"
2411 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2413 def _VerifyInstance(self, instance, instanceconfig, node_image,
2415 """Verify an instance.
2417 This function checks to see if the required block devices are
2418 available on the instance's node.
2421 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2422 node_current = instanceconfig.primary_node
2424 node_vol_should = {}
2425 instanceconfig.MapLVsByNode(node_vol_should)
2427 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2428 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2429 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2431 for node in node_vol_should:
2432 n_img = node_image[node]
2433 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2434 # ignore missing volumes on offline or broken nodes
2436 for volume in node_vol_should[node]:
2437 test = volume not in n_img.volumes
2438 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2439 "volume %s missing on node %s", volume, node)
2441 if instanceconfig.admin_state == constants.ADMINST_UP:
2442 pri_img = node_image[node_current]
2443 test = instance not in pri_img.instances and not pri_img.offline
2444 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2445 "instance not running on its primary node %s",
2448 diskdata = [(nname, success, status, idx)
2449 for (nname, disks) in diskstatus.items()
2450 for idx, (success, status) in enumerate(disks)]
2452 for nname, success, bdev_status, idx in diskdata:
2453 # the 'ghost node' construction in Exec() ensures that we have a
2455 snode = node_image[nname]
2456 bad_snode = snode.ghost or snode.offline
2457 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2458 not success and not bad_snode,
2459 constants.CV_EINSTANCEFAULTYDISK, instance,
2460 "couldn't retrieve status for disk/%s on %s: %s",
2461 idx, nname, bdev_status)
2462 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2463 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2464 constants.CV_EINSTANCEFAULTYDISK, instance,
2465 "disk/%s on %s is faulty", idx, nname)
2467 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2468 """Verify if there are any unknown volumes in the cluster.
2470 The .os, .swap and backup volumes are ignored. All other volumes are
2471 reported as unknown.
2473 @type reserved: L{ganeti.utils.FieldSet}
2474 @param reserved: a FieldSet of reserved volume names
2477 for node, n_img in node_image.items():
2478 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2479 self.all_node_info[node].group != self.group_uuid):
2480 # skip non-healthy nodes
2482 for volume in n_img.volumes:
2483 test = ((node not in node_vol_should or
2484 volume not in node_vol_should[node]) and
2485 not reserved.Matches(volume))
2486 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2487 "volume %s is unknown", volume)
2489 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2490 """Verify N+1 Memory Resilience.
2492 Check that if one single node dies we can still start all the
2493 instances it was primary for.
2496 cluster_info = self.cfg.GetClusterInfo()
2497 for node, n_img in node_image.items():
2498 # This code checks that every node which is now listed as
2499 # secondary has enough memory to host all instances it is
2500 # supposed to should a single other node in the cluster fail.
2501 # FIXME: not ready for failover to an arbitrary node
2502 # FIXME: does not support file-backed instances
2503 # WARNING: we currently take into account down instances as well
2504 # as up ones, considering that even if they're down someone
2505 # might want to start them even in the event of a node failure.
2506 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2507 # we're skipping nodes marked offline and nodes in other groups from
2508 # the N+1 warning, since most likely we don't have good memory
2509 # infromation from them; we already list instances living on such
2510 # nodes, and that's enough warning
2512 #TODO(dynmem): also consider ballooning out other instances
2513 for prinode, instances in n_img.sbp.items():
2515 for instance in instances:
2516 bep = cluster_info.FillBE(instance_cfg[instance])
2517 if bep[constants.BE_AUTO_BALANCE]:
2518 needed_mem += bep[constants.BE_MINMEM]
2519 test = n_img.mfree < needed_mem
2520 self._ErrorIf(test, constants.CV_ENODEN1, node,
2521 "not enough memory to accomodate instance failovers"
2522 " should node %s fail (%dMiB needed, %dMiB available)",
2523 prinode, needed_mem, n_img.mfree)
2526 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2527 (files_all, files_opt, files_mc, files_vm)):
2528 """Verifies file checksums collected from all nodes.
2530 @param errorif: Callback for reporting errors
2531 @param nodeinfo: List of L{objects.Node} objects
2532 @param master_node: Name of master node
2533 @param all_nvinfo: RPC results
2536 # Define functions determining which nodes to consider for a file
2539 (files_mc, lambda node: (node.master_candidate or
2540 node.name == master_node)),
2541 (files_vm, lambda node: node.vm_capable),
2544 # Build mapping from filename to list of nodes which should have the file
2546 for (files, fn) in files2nodefn:
2548 filenodes = nodeinfo
2550 filenodes = filter(fn, nodeinfo)
2551 nodefiles.update((filename,
2552 frozenset(map(operator.attrgetter("name"), filenodes)))
2553 for filename in files)
2555 assert set(nodefiles) == (files_all | files_mc | files_vm)
2557 fileinfo = dict((filename, {}) for filename in nodefiles)
2558 ignore_nodes = set()
2560 for node in nodeinfo:
2562 ignore_nodes.add(node.name)
2565 nresult = all_nvinfo[node.name]
2567 if nresult.fail_msg or not nresult.payload:
2570 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2572 test = not (node_files and isinstance(node_files, dict))
2573 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2574 "Node did not return file checksum data")
2576 ignore_nodes.add(node.name)
2579 # Build per-checksum mapping from filename to nodes having it
2580 for (filename, checksum) in node_files.items():
2581 assert filename in nodefiles
2582 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2584 for (filename, checksums) in fileinfo.items():
2585 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2587 # Nodes having the file
2588 with_file = frozenset(node_name
2589 for nodes in fileinfo[filename].values()
2590 for node_name in nodes) - ignore_nodes
2592 expected_nodes = nodefiles[filename] - ignore_nodes
2594 # Nodes missing file
2595 missing_file = expected_nodes - with_file
2597 if filename in files_opt:
2599 errorif(missing_file and missing_file != expected_nodes,
2600 constants.CV_ECLUSTERFILECHECK, None,
2601 "File %s is optional, but it must exist on all or no"
2602 " nodes (not found on %s)",
2603 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2605 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2606 "File %s is missing from node(s) %s", filename,
2607 utils.CommaJoin(utils.NiceSort(missing_file)))
2609 # Warn if a node has a file it shouldn't
2610 unexpected = with_file - expected_nodes
2612 constants.CV_ECLUSTERFILECHECK, None,
2613 "File %s should not exist on node(s) %s",
2614 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2616 # See if there are multiple versions of the file
2617 test = len(checksums) > 1
2619 variants = ["variant %s on %s" %
2620 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2621 for (idx, (checksum, nodes)) in
2622 enumerate(sorted(checksums.items()))]
2626 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2627 "File %s found with %s different checksums (%s)",
2628 filename, len(checksums), "; ".join(variants))
2630 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2632 """Verifies and the node DRBD status.
2634 @type ninfo: L{objects.Node}
2635 @param ninfo: the node to check
2636 @param nresult: the remote results for the node
2637 @param instanceinfo: the dict of instances
2638 @param drbd_helper: the configured DRBD usermode helper
2639 @param drbd_map: the DRBD map as returned by
2640 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2644 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2647 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2648 test = (helper_result == None)
2649 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2650 "no drbd usermode helper returned")
2652 status, payload = helper_result
2654 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655 "drbd usermode helper check unsuccessful: %s", payload)
2656 test = status and (payload != drbd_helper)
2657 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2658 "wrong drbd usermode helper: %s", payload)
2660 # compute the DRBD minors
2662 for minor, instance in drbd_map[node].items():
2663 test = instance not in instanceinfo
2664 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2665 "ghost instance '%s' in temporary DRBD map", instance)
2666 # ghost instance should not be running, but otherwise we
2667 # don't give double warnings (both ghost instance and
2668 # unallocated minor in use)
2670 node_drbd[minor] = (instance, False)
2672 instance = instanceinfo[instance]
2673 node_drbd[minor] = (instance.name,
2674 instance.admin_state == constants.ADMINST_UP)
2676 # and now check them
2677 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2678 test = not isinstance(used_minors, (tuple, list))
2679 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2680 "cannot parse drbd status file: %s", str(used_minors))
2682 # we cannot check drbd status
2685 for minor, (iname, must_exist) in node_drbd.items():
2686 test = minor not in used_minors and must_exist
2687 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2688 "drbd minor %d of instance %s is not active", minor, iname)
2689 for minor in used_minors:
2690 test = minor not in node_drbd
2691 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2692 "unallocated drbd minor %d is in use", minor)
2694 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2695 """Builds the node OS structures.
2697 @type ninfo: L{objects.Node}
2698 @param ninfo: the node to check
2699 @param nresult: the remote results for the node
2700 @param nimg: the node image object
2704 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2706 remote_os = nresult.get(constants.NV_OSLIST, None)
2707 test = (not isinstance(remote_os, list) or
2708 not compat.all(isinstance(v, list) and len(v) == 7
2709 for v in remote_os))
2711 _ErrorIf(test, constants.CV_ENODEOS, node,
2712 "node hasn't returned valid OS data")
2721 for (name, os_path, status, diagnose,
2722 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2724 if name not in os_dict:
2727 # parameters is a list of lists instead of list of tuples due to
2728 # JSON lacking a real tuple type, fix it:
2729 parameters = [tuple(v) for v in parameters]
2730 os_dict[name].append((os_path, status, diagnose,
2731 set(variants), set(parameters), set(api_ver)))
2733 nimg.oslist = os_dict
2735 def _VerifyNodeOS(self, ninfo, nimg, base):
2736 """Verifies the node OS list.
2738 @type ninfo: L{objects.Node}
2739 @param ninfo: the node to check
2740 @param nimg: the node image object
2741 @param base: the 'template' node we match against (e.g. from the master)
2745 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2747 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2749 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2750 for os_name, os_data in nimg.oslist.items():
2751 assert os_data, "Empty OS status for OS %s?!" % os_name
2752 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2753 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2754 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2755 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2756 "OS '%s' has multiple entries (first one shadows the rest): %s",
2757 os_name, utils.CommaJoin([v[0] for v in os_data]))
2758 # comparisons with the 'base' image
2759 test = os_name not in base.oslist
2760 _ErrorIf(test, constants.CV_ENODEOS, node,
2761 "Extra OS %s not present on reference node (%s)",
2765 assert base.oslist[os_name], "Base node has empty OS status?"
2766 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2768 # base OS is invalid, skipping
2770 for kind, a, b in [("API version", f_api, b_api),
2771 ("variants list", f_var, b_var),
2772 ("parameters", beautify_params(f_param),
2773 beautify_params(b_param))]:
2774 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2775 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2776 kind, os_name, base.name,
2777 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2779 # check any missing OSes
2780 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2781 _ErrorIf(missing, constants.CV_ENODEOS, node,
2782 "OSes present on reference node %s but missing on this node: %s",
2783 base.name, utils.CommaJoin(missing))
2785 def _VerifyOob(self, ninfo, nresult):
2786 """Verifies out of band functionality of a node.
2788 @type ninfo: L{objects.Node}
2789 @param ninfo: the node to check
2790 @param nresult: the remote results for the node
2794 # We just have to verify the paths on master and/or master candidates
2795 # as the oob helper is invoked on the master
2796 if ((ninfo.master_candidate or ninfo.master_capable) and
2797 constants.NV_OOB_PATHS in nresult):
2798 for path_result in nresult[constants.NV_OOB_PATHS]:
2799 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2801 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2802 """Verifies and updates the node volume data.
2804 This function will update a L{NodeImage}'s internal structures
2805 with data from the remote call.
2807 @type ninfo: L{objects.Node}
2808 @param ninfo: the node to check
2809 @param nresult: the remote results for the node
2810 @param nimg: the node image object
2811 @param vg_name: the configured VG name
2815 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2817 nimg.lvm_fail = True
2818 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2821 elif isinstance(lvdata, basestring):
2822 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2823 utils.SafeEncode(lvdata))
2824 elif not isinstance(lvdata, dict):
2825 _ErrorIf(True, constants.CV_ENODELVM, node,
2826 "rpc call to node failed (lvlist)")
2828 nimg.volumes = lvdata
2829 nimg.lvm_fail = False
2831 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2832 """Verifies and updates the node instance list.
2834 If the listing was successful, then updates this node's instance
2835 list. Otherwise, it marks the RPC call as failed for the instance
2838 @type ninfo: L{objects.Node}
2839 @param ninfo: the node to check
2840 @param nresult: the remote results for the node
2841 @param nimg: the node image object
2844 idata = nresult.get(constants.NV_INSTANCELIST, None)
2845 test = not isinstance(idata, list)
2846 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2847 "rpc call to node failed (instancelist): %s",
2848 utils.SafeEncode(str(idata)))
2850 nimg.hyp_fail = True
2852 nimg.instances = idata
2854 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2855 """Verifies and computes a node information map
2857 @type ninfo: L{objects.Node}
2858 @param ninfo: the node to check
2859 @param nresult: the remote results for the node
2860 @param nimg: the node image object
2861 @param vg_name: the configured VG name
2865 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2867 # try to read free memory (from the hypervisor)
2868 hv_info = nresult.get(constants.NV_HVINFO, None)
2869 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2870 _ErrorIf(test, constants.CV_ENODEHV, node,
2871 "rpc call to node failed (hvinfo)")
2874 nimg.mfree = int(hv_info["memory_free"])
2875 except (ValueError, TypeError):
2876 _ErrorIf(True, constants.CV_ENODERPC, node,
2877 "node returned invalid nodeinfo, check hypervisor")
2879 # FIXME: devise a free space model for file based instances as well
2880 if vg_name is not None:
2881 test = (constants.NV_VGLIST not in nresult or
2882 vg_name not in nresult[constants.NV_VGLIST])
2883 _ErrorIf(test, constants.CV_ENODELVM, node,
2884 "node didn't return data for the volume group '%s'"
2885 " - it is either missing or broken", vg_name)
2888 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2889 except (ValueError, TypeError):
2890 _ErrorIf(True, constants.CV_ENODERPC, node,
2891 "node returned invalid LVM info, check LVM status")
2893 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2894 """Gets per-disk status information for all instances.
2896 @type nodelist: list of strings
2897 @param nodelist: Node names
2898 @type node_image: dict of (name, L{objects.Node})
2899 @param node_image: Node objects
2900 @type instanceinfo: dict of (name, L{objects.Instance})
2901 @param instanceinfo: Instance objects
2902 @rtype: {instance: {node: [(succes, payload)]}}
2903 @return: a dictionary of per-instance dictionaries with nodes as
2904 keys and disk information as values; the disk information is a
2905 list of tuples (success, payload)
2908 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2911 node_disks_devonly = {}
2912 diskless_instances = set()
2913 diskless = constants.DT_DISKLESS
2915 for nname in nodelist:
2916 node_instances = list(itertools.chain(node_image[nname].pinst,
2917 node_image[nname].sinst))
2918 diskless_instances.update(inst for inst in node_instances
2919 if instanceinfo[inst].disk_template == diskless)
2920 disks = [(inst, disk)
2921 for inst in node_instances
2922 for disk in instanceinfo[inst].disks]
2925 # No need to collect data
2928 node_disks[nname] = disks
2930 # _AnnotateDiskParams makes already copies of the disks
2932 for (inst, dev) in disks:
2933 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2934 self.cfg.SetDiskID(anno_disk, nname)
2935 devonly.append(anno_disk)
2937 node_disks_devonly[nname] = devonly
2939 assert len(node_disks) == len(node_disks_devonly)
2941 # Collect data from all nodes with disks
2942 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2945 assert len(result) == len(node_disks)
2949 for (nname, nres) in result.items():
2950 disks = node_disks[nname]
2953 # No data from this node
2954 data = len(disks) * [(False, "node offline")]
2957 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2958 "while getting disk information: %s", msg)
2960 # No data from this node
2961 data = len(disks) * [(False, msg)]
2964 for idx, i in enumerate(nres.payload):
2965 if isinstance(i, (tuple, list)) and len(i) == 2:
2968 logging.warning("Invalid result from node %s, entry %d: %s",
2970 data.append((False, "Invalid result from the remote node"))
2972 for ((inst, _), status) in zip(disks, data):
2973 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2975 # Add empty entries for diskless instances.
2976 for inst in diskless_instances:
2977 assert inst not in instdisk
2980 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2981 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2982 compat.all(isinstance(s, (tuple, list)) and
2983 len(s) == 2 for s in statuses)
2984 for inst, nnames in instdisk.items()
2985 for nname, statuses in nnames.items())
2986 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2991 def _SshNodeSelector(group_uuid, all_nodes):
2992 """Create endless iterators for all potential SSH check hosts.
2995 nodes = [node for node in all_nodes
2996 if (node.group != group_uuid and
2998 keyfunc = operator.attrgetter("group")
3000 return map(itertools.cycle,
3001 [sorted(map(operator.attrgetter("name"), names))
3002 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3006 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3007 """Choose which nodes should talk to which other nodes.
3009 We will make nodes contact all nodes in their group, and one node from
3012 @warning: This algorithm has a known issue if one node group is much
3013 smaller than others (e.g. just one node). In such a case all other
3014 nodes will talk to the single node.
3017 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3018 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3020 return (online_nodes,
3021 dict((name, sorted([i.next() for i in sel]))
3022 for name in online_nodes))
3024 def BuildHooksEnv(self):
3027 Cluster-Verify hooks just ran in the post phase and their failure makes
3028 the output be logged in the verify output and the verification to fail.
3032 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3035 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3036 for node in self.my_node_info.values())
3040 def BuildHooksNodes(self):
3041 """Build hooks nodes.
3044 return ([], self.my_node_names)
3046 def Exec(self, feedback_fn):
3047 """Verify integrity of the node group, performing various test on nodes.
3050 # This method has too many local variables. pylint: disable=R0914
3051 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3053 if not self.my_node_names:
3055 feedback_fn("* Empty node group, skipping verification")
3059 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3060 verbose = self.op.verbose
3061 self._feedback_fn = feedback_fn
3063 vg_name = self.cfg.GetVGName()
3064 drbd_helper = self.cfg.GetDRBDHelper()
3065 cluster = self.cfg.GetClusterInfo()
3066 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3067 hypervisors = cluster.enabled_hypervisors
3068 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3070 i_non_redundant = [] # Non redundant instances
3071 i_non_a_balanced = [] # Non auto-balanced instances
3072 i_offline = 0 # Count of offline instances
3073 n_offline = 0 # Count of offline nodes
3074 n_drained = 0 # Count of nodes being drained
3075 node_vol_should = {}
3077 # FIXME: verify OS list
3080 filemap = _ComputeAncillaryFiles(cluster, False)
3082 # do local checksums
3083 master_node = self.master_node = self.cfg.GetMasterNode()
3084 master_ip = self.cfg.GetMasterIP()
3086 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3089 if self.cfg.GetUseExternalMipScript():
3090 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3092 node_verify_param = {
3093 constants.NV_FILELIST:
3094 utils.UniqueSequence(filename
3095 for files in filemap
3096 for filename in files),
3097 constants.NV_NODELIST:
3098 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3099 self.all_node_info.values()),
3100 constants.NV_HYPERVISOR: hypervisors,
3101 constants.NV_HVPARAMS:
3102 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3103 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3104 for node in node_data_list
3105 if not node.offline],
3106 constants.NV_INSTANCELIST: hypervisors,
3107 constants.NV_VERSION: None,
3108 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3109 constants.NV_NODESETUP: None,
3110 constants.NV_TIME: None,
3111 constants.NV_MASTERIP: (master_node, master_ip),
3112 constants.NV_OSLIST: None,
3113 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3114 constants.NV_USERSCRIPTS: user_scripts,
3117 if vg_name is not None:
3118 node_verify_param[constants.NV_VGLIST] = None
3119 node_verify_param[constants.NV_LVLIST] = vg_name
3120 node_verify_param[constants.NV_PVLIST] = [vg_name]
3121 node_verify_param[constants.NV_DRBDLIST] = None
3124 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3127 # FIXME: this needs to be changed per node-group, not cluster-wide
3129 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3130 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3131 bridges.add(default_nicpp[constants.NIC_LINK])
3132 for instance in self.my_inst_info.values():
3133 for nic in instance.nics:
3134 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3135 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3136 bridges.add(full_nic[constants.NIC_LINK])
3139 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3141 # Build our expected cluster state
3142 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3144 vm_capable=node.vm_capable))
3145 for node in node_data_list)
3149 for node in self.all_node_info.values():
3150 path = _SupportsOob(self.cfg, node)
3151 if path and path not in oob_paths:
3152 oob_paths.append(path)
3155 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3157 for instance in self.my_inst_names:
3158 inst_config = self.my_inst_info[instance]
3159 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3162 for nname in inst_config.all_nodes:
3163 if nname not in node_image:
3164 gnode = self.NodeImage(name=nname)
3165 gnode.ghost = (nname not in self.all_node_info)
3166 node_image[nname] = gnode
3168 inst_config.MapLVsByNode(node_vol_should)
3170 pnode = inst_config.primary_node
3171 node_image[pnode].pinst.append(instance)
3173 for snode in inst_config.secondary_nodes:
3174 nimg = node_image[snode]
3175 nimg.sinst.append(instance)
3176 if pnode not in nimg.sbp:
3177 nimg.sbp[pnode] = []
3178 nimg.sbp[pnode].append(instance)
3180 # At this point, we have the in-memory data structures complete,
3181 # except for the runtime information, which we'll gather next
3183 # Due to the way our RPC system works, exact response times cannot be
3184 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3185 # time before and after executing the request, we can at least have a time
3187 nvinfo_starttime = time.time()
3188 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3190 self.cfg.GetClusterName())
3191 nvinfo_endtime = time.time()
3193 if self.extra_lv_nodes and vg_name is not None:
3195 self.rpc.call_node_verify(self.extra_lv_nodes,
3196 {constants.NV_LVLIST: vg_name},
3197 self.cfg.GetClusterName())
3199 extra_lv_nvinfo = {}
3201 all_drbd_map = self.cfg.ComputeDRBDMap()
3203 feedback_fn("* Gathering disk information (%s nodes)" %
3204 len(self.my_node_names))
3205 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3208 feedback_fn("* Verifying configuration file consistency")
3210 # If not all nodes are being checked, we need to make sure the master node
3211 # and a non-checked vm_capable node are in the list.
3212 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3214 vf_nvinfo = all_nvinfo.copy()
3215 vf_node_info = list(self.my_node_info.values())
3216 additional_nodes = []
3217 if master_node not in self.my_node_info:
3218 additional_nodes.append(master_node)
3219 vf_node_info.append(self.all_node_info[master_node])
3220 # Add the first vm_capable node we find which is not included
3221 for node in absent_nodes:
3222 nodeinfo = self.all_node_info[node]
3223 if nodeinfo.vm_capable and not nodeinfo.offline:
3224 additional_nodes.append(node)
3225 vf_node_info.append(self.all_node_info[node])
3227 key = constants.NV_FILELIST
3228 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3229 {key: node_verify_param[key]},
3230 self.cfg.GetClusterName()))
3232 vf_nvinfo = all_nvinfo
3233 vf_node_info = self.my_node_info.values()
3235 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3237 feedback_fn("* Verifying node status")
3241 for node_i in node_data_list:
3243 nimg = node_image[node]
3247 feedback_fn("* Skipping offline node %s" % (node,))
3251 if node == master_node:
3253 elif node_i.master_candidate:
3254 ntype = "master candidate"
3255 elif node_i.drained:
3261 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3263 msg = all_nvinfo[node].fail_msg
3264 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3267 nimg.rpc_fail = True
3270 nresult = all_nvinfo[node].payload
3272 nimg.call_ok = self._VerifyNode(node_i, nresult)
3273 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3274 self._VerifyNodeNetwork(node_i, nresult)
3275 self._VerifyNodeUserScripts(node_i, nresult)
3276 self._VerifyOob(node_i, nresult)
3279 self._VerifyNodeLVM(node_i, nresult, vg_name)
3280 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3283 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3284 self._UpdateNodeInstances(node_i, nresult, nimg)
3285 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3286 self._UpdateNodeOS(node_i, nresult, nimg)
3288 if not nimg.os_fail:
3289 if refos_img is None:
3291 self._VerifyNodeOS(node_i, nimg, refos_img)
3292 self._VerifyNodeBridges(node_i, nresult, bridges)
3294 # Check whether all running instancies are primary for the node. (This
3295 # can no longer be done from _VerifyInstance below, since some of the
3296 # wrong instances could be from other node groups.)
3297 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3299 for inst in non_primary_inst:
3300 test = inst in self.all_inst_info
3301 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3302 "instance should not run on node %s", node_i.name)
3303 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3304 "node is running unknown instance %s", inst)
3306 for node, result in extra_lv_nvinfo.items():
3307 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3308 node_image[node], vg_name)
3310 feedback_fn("* Verifying instance status")
3311 for instance in self.my_inst_names:
3313 feedback_fn("* Verifying instance %s" % instance)
3314 inst_config = self.my_inst_info[instance]
3315 self._VerifyInstance(instance, inst_config, node_image,
3317 inst_nodes_offline = []
3319 pnode = inst_config.primary_node
3320 pnode_img = node_image[pnode]
3321 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3322 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3323 " primary node failed", instance)
3325 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3327 constants.CV_EINSTANCEBADNODE, instance,
3328 "instance is marked as running and lives on offline node %s",
3329 inst_config.primary_node)
3331 # If the instance is non-redundant we cannot survive losing its primary
3332 # node, so we are not N+1 compliant. On the other hand we have no disk
3333 # templates with more than one secondary so that situation is not well
3335 # FIXME: does not support file-backed instances
3336 if not inst_config.secondary_nodes:
3337 i_non_redundant.append(instance)
3339 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3340 constants.CV_EINSTANCELAYOUT,
3341 instance, "instance has multiple secondary nodes: %s",
3342 utils.CommaJoin(inst_config.secondary_nodes),
3343 code=self.ETYPE_WARNING)
3345 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3346 pnode = inst_config.primary_node
3347 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3348 instance_groups = {}
3350 for node in instance_nodes:
3351 instance_groups.setdefault(self.all_node_info[node].group,
3355 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3356 # Sort so that we always list the primary node first.
3357 for group, nodes in sorted(instance_groups.items(),
3358 key=lambda (_, nodes): pnode in nodes,
3361 self._ErrorIf(len(instance_groups) > 1,
3362 constants.CV_EINSTANCESPLITGROUPS,
3363 instance, "instance has primary and secondary nodes in"
3364 " different groups: %s", utils.CommaJoin(pretty_list),
3365 code=self.ETYPE_WARNING)
3367 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3368 i_non_a_balanced.append(instance)
3370 for snode in inst_config.secondary_nodes:
3371 s_img = node_image[snode]
3372 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3373 snode, "instance %s, connection to secondary node failed",
3377 inst_nodes_offline.append(snode)
3379 # warn that the instance lives on offline nodes
3380 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3381 "instance has offline secondary node(s) %s",
3382 utils.CommaJoin(inst_nodes_offline))
3383 # ... or ghost/non-vm_capable nodes
3384 for node in inst_config.all_nodes:
3385 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3386 instance, "instance lives on ghost node %s", node)
3387 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3388 instance, "instance lives on non-vm_capable node %s", node)
3390 feedback_fn("* Verifying orphan volumes")
3391 reserved = utils.FieldSet(*cluster.reserved_lvs)
3393 # We will get spurious "unknown volume" warnings if any node of this group
3394 # is secondary for an instance whose primary is in another group. To avoid
3395 # them, we find these instances and add their volumes to node_vol_should.
3396 for inst in self.all_inst_info.values():
3397 for secondary in inst.secondary_nodes:
3398 if (secondary in self.my_node_info
3399 and inst.name not in self.my_inst_info):
3400 inst.MapLVsByNode(node_vol_should)
3403 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3405 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3406 feedback_fn("* Verifying N+1 Memory redundancy")
3407 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3409 feedback_fn("* Other Notes")
3411 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3412 % len(i_non_redundant))
3414 if i_non_a_balanced:
3415 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3416 % len(i_non_a_balanced))
3419 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3422 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3425 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3429 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3430 """Analyze the post-hooks' result
3432 This method analyses the hook result, handles it, and sends some
3433 nicely-formatted feedback back to the user.
3435 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3436 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3437 @param hooks_results: the results of the multi-node hooks rpc call
3438 @param feedback_fn: function used send feedback back to the caller
3439 @param lu_result: previous Exec result
3440 @return: the new Exec result, based on the previous result
3444 # We only really run POST phase hooks, only for non-empty groups,
3445 # and are only interested in their results
3446 if not self.my_node_names:
3449 elif phase == constants.HOOKS_PHASE_POST:
3450 # Used to change hooks' output to proper indentation
3451 feedback_fn("* Hooks Results")
3452 assert hooks_results, "invalid result from hooks"
3454 for node_name in hooks_results:
3455 res = hooks_results[node_name]
3457 test = msg and not res.offline
3458 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3459 "Communication failure in hooks execution: %s", msg)
3460 if res.offline or msg:
3461 # No need to investigate payload if node is offline or gave
3464 for script, hkr, output in res.payload:
3465 test = hkr == constants.HKR_FAIL
3466 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3467 "Script %s failed, output:", script)
3469 output = self._HOOKS_INDENT_RE.sub(" ", output)
3470 feedback_fn("%s" % output)
3476 class LUClusterVerifyDisks(NoHooksLU):
3477 """Verifies the cluster disks status.
3482 def ExpandNames(self):
3483 self.share_locks = _ShareAll()
3484 self.needed_locks = {
3485 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3488 def Exec(self, feedback_fn):
3489 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3491 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3492 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3493 for group in group_names])
3496 class LUGroupVerifyDisks(NoHooksLU):
3497 """Verifies the status of all disks in a node group.
3502 def ExpandNames(self):
3503 # Raises errors.OpPrereqError on its own if group can't be found
3504 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3506 self.share_locks = _ShareAll()
3507 self.needed_locks = {
3508 locking.LEVEL_INSTANCE: [],
3509 locking.LEVEL_NODEGROUP: [],
3510 locking.LEVEL_NODE: [],
3513 def DeclareLocks(self, level):
3514 if level == locking.LEVEL_INSTANCE:
3515 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3517 # Lock instances optimistically, needs verification once node and group
3518 # locks have been acquired
3519 self.needed_locks[locking.LEVEL_INSTANCE] = \
3520 self.cfg.GetNodeGroupInstances(self.group_uuid)
3522 elif level == locking.LEVEL_NODEGROUP:
3523 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3525 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3526 set([self.group_uuid] +
3527 # Lock all groups used by instances optimistically; this requires
3528 # going via the node before it's locked, requiring verification
3531 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3532 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3534 elif level == locking.LEVEL_NODE:
3535 # This will only lock the nodes in the group to be verified which contain
3537 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3538 self._LockInstancesNodes()
3540 # Lock all nodes in group to be verified
3541 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3542 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3543 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3545 def CheckPrereq(self):
3546 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3547 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3548 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3550 assert self.group_uuid in owned_groups
3552 # Check if locked instances are still correct
3553 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3555 # Get instance information
3556 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3558 # Check if node groups for locked instances are still correct
3559 _CheckInstancesNodeGroups(self.cfg, self.instances,
3560 owned_groups, owned_nodes, self.group_uuid)
3562 def Exec(self, feedback_fn):
3563 """Verify integrity of cluster disks.
3565 @rtype: tuple of three items
3566 @return: a tuple of (dict of node-to-node_error, list of instances
3567 which need activate-disks, dict of instance: (node, volume) for
3572 res_instances = set()
3575 nv_dict = _MapInstanceDisksToNodes([inst
3576 for inst in self.instances.values()
3577 if inst.admin_state == constants.ADMINST_UP])
3580 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3581 set(self.cfg.GetVmCapableNodeList()))
3583 node_lvs = self.rpc.call_lv_list(nodes, [])
3585 for (node, node_res) in node_lvs.items():
3586 if node_res.offline:
3589 msg = node_res.fail_msg
3591 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3592 res_nodes[node] = msg
3595 for lv_name, (_, _, lv_online) in node_res.payload.items():
3596 inst = nv_dict.pop((node, lv_name), None)
3597 if not (lv_online or inst is None):
3598 res_instances.add(inst)
3600 # any leftover items in nv_dict are missing LVs, let's arrange the data
3602 for key, inst in nv_dict.iteritems():
3603 res_missing.setdefault(inst, []).append(list(key))
3605 return (res_nodes, list(res_instances), res_missing)
3608 class LUClusterRepairDiskSizes(NoHooksLU):
3609 """Verifies the cluster disks sizes.
3614 def ExpandNames(self):
3615 if self.op.instances:
3616 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3617 self.needed_locks = {
3618 locking.LEVEL_NODE_RES: [],
3619 locking.LEVEL_INSTANCE: self.wanted_names,
3621 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3623 self.wanted_names = None
3624 self.needed_locks = {
3625 locking.LEVEL_NODE_RES: locking.ALL_SET,
3626 locking.LEVEL_INSTANCE: locking.ALL_SET,
3628 self.share_locks = {
3629 locking.LEVEL_NODE_RES: 1,
3630 locking.LEVEL_INSTANCE: 0,
3633 def DeclareLocks(self, level):
3634 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3635 self._LockInstancesNodes(primary_only=True, level=level)
3637 def CheckPrereq(self):
3638 """Check prerequisites.
3640 This only checks the optional instance list against the existing names.
3643 if self.wanted_names is None:
3644 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3646 self.wanted_instances = \
3647 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3649 def _EnsureChildSizes(self, disk):
3650 """Ensure children of the disk have the needed disk size.
3652 This is valid mainly for DRBD8 and fixes an issue where the
3653 children have smaller disk size.
3655 @param disk: an L{ganeti.objects.Disk} object
3658 if disk.dev_type == constants.LD_DRBD8:
3659 assert disk.children, "Empty children for DRBD8?"
3660 fchild = disk.children[0]
3661 mismatch = fchild.size < disk.size
3663 self.LogInfo("Child disk has size %d, parent %d, fixing",
3664 fchild.size, disk.size)
3665 fchild.size = disk.size
3667 # and we recurse on this child only, not on the metadev
3668 return self._EnsureChildSizes(fchild) or mismatch
3672 def Exec(self, feedback_fn):
3673 """Verify the size of cluster disks.
3676 # TODO: check child disks too
3677 # TODO: check differences in size between primary/secondary nodes
3679 for instance in self.wanted_instances:
3680 pnode = instance.primary_node
3681 if pnode not in per_node_disks:
3682 per_node_disks[pnode] = []
3683 for idx, disk in enumerate(instance.disks):
3684 per_node_disks[pnode].append((instance, idx, disk))
3686 assert not (frozenset(per_node_disks.keys()) -
3687 self.owned_locks(locking.LEVEL_NODE_RES)), \
3688 "Not owning correct locks"
3689 assert not self.owned_locks(locking.LEVEL_NODE)
3692 for node, dskl in per_node_disks.items():
3693 newl = [v[2].Copy() for v in dskl]
3695 self.cfg.SetDiskID(dsk, node)
3696 result = self.rpc.call_blockdev_getsize(node, newl)
3698 self.LogWarning("Failure in blockdev_getsize call to node"
3699 " %s, ignoring", node)
3701 if len(result.payload) != len(dskl):
3702 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3703 " result.payload=%s", node, len(dskl), result.payload)
3704 self.LogWarning("Invalid result from node %s, ignoring node results",
3707 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3709 self.LogWarning("Disk %d of instance %s did not return size"
3710 " information, ignoring", idx, instance.name)
3712 if not isinstance(size, (int, long)):
3713 self.LogWarning("Disk %d of instance %s did not return valid"
3714 " size information, ignoring", idx, instance.name)
3717 if size != disk.size:
3718 self.LogInfo("Disk %d of instance %s has mismatched size,"
3719 " correcting: recorded %d, actual %d", idx,
3720 instance.name, disk.size, size)
3722 self.cfg.Update(instance, feedback_fn)
3723 changed.append((instance.name, idx, size))
3724 if self._EnsureChildSizes(disk):
3725 self.cfg.Update(instance, feedback_fn)
3726 changed.append((instance.name, idx, disk.size))
3730 class LUClusterRename(LogicalUnit):
3731 """Rename the cluster.
3734 HPATH = "cluster-rename"
3735 HTYPE = constants.HTYPE_CLUSTER
3737 def BuildHooksEnv(self):
3742 "OP_TARGET": self.cfg.GetClusterName(),
3743 "NEW_NAME": self.op.name,
3746 def BuildHooksNodes(self):
3747 """Build hooks nodes.
3750 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3752 def CheckPrereq(self):
3753 """Verify that the passed name is a valid one.
3756 hostname = netutils.GetHostname(name=self.op.name,
3757 family=self.cfg.GetPrimaryIPFamily())
3759 new_name = hostname.name
3760 self.ip = new_ip = hostname.ip
3761 old_name = self.cfg.GetClusterName()
3762 old_ip = self.cfg.GetMasterIP()
3763 if new_name == old_name and new_ip == old_ip:
3764 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3765 " cluster has changed",
3767 if new_ip != old_ip:
3768 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3769 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3770 " reachable on the network" %
3771 new_ip, errors.ECODE_NOTUNIQUE)
3773 self.op.name = new_name
3775 def Exec(self, feedback_fn):
3776 """Rename the cluster.
3779 clustername = self.op.name
3782 # shutdown the master IP
3783 master_params = self.cfg.GetMasterNetworkParameters()
3784 ems = self.cfg.GetUseExternalMipScript()
3785 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3787 result.Raise("Could not disable the master role")
3790 cluster = self.cfg.GetClusterInfo()
3791 cluster.cluster_name = clustername
3792 cluster.master_ip = new_ip
3793 self.cfg.Update(cluster, feedback_fn)
3795 # update the known hosts file
3796 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3797 node_list = self.cfg.GetOnlineNodeList()
3799 node_list.remove(master_params.name)
3802 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3804 master_params.ip = new_ip
3805 result = self.rpc.call_node_activate_master_ip(master_params.name,
3807 msg = result.fail_msg
3809 self.LogWarning("Could not re-enable the master role on"
3810 " the master, please restart manually: %s", msg)
3815 def _ValidateNetmask(cfg, netmask):
3816 """Checks if a netmask is valid.
3818 @type cfg: L{config.ConfigWriter}
3819 @param cfg: The cluster configuration
3821 @param netmask: the netmask to be verified
3822 @raise errors.OpPrereqError: if the validation fails
3825 ip_family = cfg.GetPrimaryIPFamily()
3827 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3828 except errors.ProgrammerError:
3829 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3831 if not ipcls.ValidateNetmask(netmask):
3832 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3836 class LUClusterSetParams(LogicalUnit):
3837 """Change the parameters of the cluster.
3840 HPATH = "cluster-modify"
3841 HTYPE = constants.HTYPE_CLUSTER
3844 def CheckArguments(self):
3848 if self.op.uid_pool:
3849 uidpool.CheckUidPool(self.op.uid_pool)
3851 if self.op.add_uids:
3852 uidpool.CheckUidPool(self.op.add_uids)
3854 if self.op.remove_uids:
3855 uidpool.CheckUidPool(self.op.remove_uids)
3857 if self.op.master_netmask is not None:
3858 _ValidateNetmask(self.cfg, self.op.master_netmask)
3860 if self.op.diskparams:
3861 for dt_params in self.op.diskparams.values():
3862 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3864 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3865 except errors.OpPrereqError, err:
3866 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3869 def ExpandNames(self):
3870 # FIXME: in the future maybe other cluster params won't require checking on
3871 # all nodes to be modified.
3872 self.needed_locks = {
3873 locking.LEVEL_NODE: locking.ALL_SET,
3874 locking.LEVEL_INSTANCE: locking.ALL_SET,
3875 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3877 self.share_locks = {
3878 locking.LEVEL_NODE: 1,
3879 locking.LEVEL_INSTANCE: 1,
3880 locking.LEVEL_NODEGROUP: 1,
3883 def BuildHooksEnv(self):
3888 "OP_TARGET": self.cfg.GetClusterName(),
3889 "NEW_VG_NAME": self.op.vg_name,
3892 def BuildHooksNodes(self):
3893 """Build hooks nodes.
3896 mn = self.cfg.GetMasterNode()
3899 def CheckPrereq(self):
3900 """Check prerequisites.
3902 This checks whether the given params don't conflict and
3903 if the given volume group is valid.
3906 if self.op.vg_name is not None and not self.op.vg_name:
3907 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3908 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3909 " instances exist", errors.ECODE_INVAL)
3911 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3912 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3913 raise errors.OpPrereqError("Cannot disable drbd helper while"
3914 " drbd-based instances exist",
3917 node_list = self.owned_locks(locking.LEVEL_NODE)
3919 # if vg_name not None, checks given volume group on all nodes
3921 vglist = self.rpc.call_vg_list(node_list)
3922 for node in node_list:
3923 msg = vglist[node].fail_msg
3925 # ignoring down node
3926 self.LogWarning("Error while gathering data on node %s"
3927 " (ignoring node): %s", node, msg)
3929 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3931 constants.MIN_VG_SIZE)
3933 raise errors.OpPrereqError("Error on node '%s': %s" %
3934 (node, vgstatus), errors.ECODE_ENVIRON)
3936 if self.op.drbd_helper:
3937 # checks given drbd helper on all nodes
3938 helpers = self.rpc.call_drbd_helper(node_list)
3939 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3941 self.LogInfo("Not checking drbd helper on offline node %s", node)
3943 msg = helpers[node].fail_msg
3945 raise errors.OpPrereqError("Error checking drbd helper on node"
3946 " '%s': %s" % (node, msg),
3947 errors.ECODE_ENVIRON)
3948 node_helper = helpers[node].payload
3949 if node_helper != self.op.drbd_helper:
3950 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3951 (node, node_helper), errors.ECODE_ENVIRON)
3953 self.cluster = cluster = self.cfg.GetClusterInfo()
3954 # validate params changes
3955 if self.op.beparams:
3956 objects.UpgradeBeParams(self.op.beparams)
3957 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3958 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3960 if self.op.ndparams:
3961 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3962 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3964 # TODO: we need a more general way to handle resetting
3965 # cluster-level parameters to default values
3966 if self.new_ndparams["oob_program"] == "":
3967 self.new_ndparams["oob_program"] = \
3968 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3970 if self.op.hv_state:
3971 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3972 self.cluster.hv_state_static)
3973 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3974 for hv, values in new_hv_state.items())
3976 if self.op.disk_state:
3977 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3978 self.cluster.disk_state_static)
3979 self.new_disk_state = \
3980 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3981 for name, values in svalues.items()))
3982 for storage, svalues in new_disk_state.items())
3985 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3988 all_instances = self.cfg.GetAllInstancesInfo().values()
3990 for group in self.cfg.GetAllNodeGroupsInfo().values():
3991 instances = frozenset([inst for inst in all_instances
3992 if compat.any(node in group.members
3993 for node in inst.all_nodes)])
3994 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3995 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3997 new_ipolicy, instances)
3999 violations.update(new)
4002 self.LogWarning("After the ipolicy change the following instances"
4003 " violate them: %s",
4004 utils.CommaJoin(utils.NiceSort(violations)))
4006 if self.op.nicparams:
4007 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4008 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4009 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4012 # check all instances for consistency
4013 for instance in self.cfg.GetAllInstancesInfo().values():
4014 for nic_idx, nic in enumerate(instance.nics):
4015 params_copy = copy.deepcopy(nic.nicparams)
4016 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4018 # check parameter syntax
4020 objects.NIC.CheckParameterSyntax(params_filled)
4021 except errors.ConfigurationError, err:
4022 nic_errors.append("Instance %s, nic/%d: %s" %
4023 (instance.name, nic_idx, err))
4025 # if we're moving instances to routed, check that they have an ip
4026 target_mode = params_filled[constants.NIC_MODE]
4027 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4028 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4029 " address" % (instance.name, nic_idx))
4031 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4032 "\n".join(nic_errors))
4034 # hypervisor list/parameters
4035 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4036 if self.op.hvparams:
4037 for hv_name, hv_dict in self.op.hvparams.items():
4038 if hv_name not in self.new_hvparams:
4039 self.new_hvparams[hv_name] = hv_dict
4041 self.new_hvparams[hv_name].update(hv_dict)
4043 # disk template parameters
4044 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4045 if self.op.diskparams:
4046 for dt_name, dt_params in self.op.diskparams.items():
4047 if dt_name not in self.op.diskparams:
4048 self.new_diskparams[dt_name] = dt_params
4050 self.new_diskparams[dt_name].update(dt_params)
4052 # os hypervisor parameters
4053 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4055 for os_name, hvs in self.op.os_hvp.items():
4056 if os_name not in self.new_os_hvp:
4057 self.new_os_hvp[os_name] = hvs
4059 for hv_name, hv_dict in hvs.items():
4060 if hv_name not in self.new_os_hvp[os_name]:
4061 self.new_os_hvp[os_name][hv_name] = hv_dict
4063 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4066 self.new_osp = objects.FillDict(cluster.osparams, {})
4067 if self.op.osparams:
4068 for os_name, osp in self.op.osparams.items():
4069 if os_name not in self.new_osp:
4070 self.new_osp[os_name] = {}
4072 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4075 if not self.new_osp[os_name]:
4076 # we removed all parameters
4077 del self.new_osp[os_name]
4079 # check the parameter validity (remote check)
4080 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4081 os_name, self.new_osp[os_name])
4083 # changes to the hypervisor list
4084 if self.op.enabled_hypervisors is not None:
4085 self.hv_list = self.op.enabled_hypervisors
4086 for hv in self.hv_list:
4087 # if the hypervisor doesn't already exist in the cluster
4088 # hvparams, we initialize it to empty, and then (in both
4089 # cases) we make sure to fill the defaults, as we might not
4090 # have a complete defaults list if the hypervisor wasn't
4092 if hv not in new_hvp:
4094 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4095 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4097 self.hv_list = cluster.enabled_hypervisors
4099 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4100 # either the enabled list has changed, or the parameters have, validate
4101 for hv_name, hv_params in self.new_hvparams.items():
4102 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4103 (self.op.enabled_hypervisors and
4104 hv_name in self.op.enabled_hypervisors)):
4105 # either this is a new hypervisor, or its parameters have changed
4106 hv_class = hypervisor.GetHypervisor(hv_name)
4107 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4108 hv_class.CheckParameterSyntax(hv_params)
4109 _CheckHVParams(self, node_list, hv_name, hv_params)
4112 # no need to check any newly-enabled hypervisors, since the
4113 # defaults have already been checked in the above code-block
4114 for os_name, os_hvp in self.new_os_hvp.items():
4115 for hv_name, hv_params in os_hvp.items():
4116 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4117 # we need to fill in the new os_hvp on top of the actual hv_p
4118 cluster_defaults = self.new_hvparams.get(hv_name, {})
4119 new_osp = objects.FillDict(cluster_defaults, hv_params)
4120 hv_class = hypervisor.GetHypervisor(hv_name)
4121 hv_class.CheckParameterSyntax(new_osp)
4122 _CheckHVParams(self, node_list, hv_name, new_osp)
4124 if self.op.default_iallocator:
4125 alloc_script = utils.FindFile(self.op.default_iallocator,
4126 constants.IALLOCATOR_SEARCH_PATH,
4128 if alloc_script is None:
4129 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4130 " specified" % self.op.default_iallocator,
4133 def Exec(self, feedback_fn):
4134 """Change the parameters of the cluster.
4137 if self.op.vg_name is not None:
4138 new_volume = self.op.vg_name
4141 if new_volume != self.cfg.GetVGName():
4142 self.cfg.SetVGName(new_volume)
4144 feedback_fn("Cluster LVM configuration already in desired"
4145 " state, not changing")
4146 if self.op.drbd_helper is not None:
4147 new_helper = self.op.drbd_helper
4150 if new_helper != self.cfg.GetDRBDHelper():
4151 self.cfg.SetDRBDHelper(new_helper)
4153 feedback_fn("Cluster DRBD helper already in desired state,"
4155 if self.op.hvparams:
4156 self.cluster.hvparams = self.new_hvparams
4158 self.cluster.os_hvp = self.new_os_hvp
4159 if self.op.enabled_hypervisors is not None:
4160 self.cluster.hvparams = self.new_hvparams
4161 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4162 if self.op.beparams:
4163 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4164 if self.op.nicparams:
4165 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4167 self.cluster.ipolicy = self.new_ipolicy
4168 if self.op.osparams:
4169 self.cluster.osparams = self.new_osp
4170 if self.op.ndparams:
4171 self.cluster.ndparams = self.new_ndparams
4172 if self.op.diskparams:
4173 self.cluster.diskparams = self.new_diskparams
4174 if self.op.hv_state:
4175 self.cluster.hv_state_static = self.new_hv_state
4176 if self.op.disk_state:
4177 self.cluster.disk_state_static = self.new_disk_state
4179 if self.op.candidate_pool_size is not None:
4180 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4181 # we need to update the pool size here, otherwise the save will fail
4182 _AdjustCandidatePool(self, [])
4184 if self.op.maintain_node_health is not None:
4185 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4186 feedback_fn("Note: CONFD was disabled at build time, node health"
4187 " maintenance is not useful (still enabling it)")
4188 self.cluster.maintain_node_health = self.op.maintain_node_health
4190 if self.op.prealloc_wipe_disks is not None:
4191 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4193 if self.op.add_uids is not None:
4194 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4196 if self.op.remove_uids is not None:
4197 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4199 if self.op.uid_pool is not None:
4200 self.cluster.uid_pool = self.op.uid_pool
4202 if self.op.default_iallocator is not None:
4203 self.cluster.default_iallocator = self.op.default_iallocator
4205 if self.op.reserved_lvs is not None:
4206 self.cluster.reserved_lvs = self.op.reserved_lvs
4208 if self.op.use_external_mip_script is not None:
4209 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4211 def helper_os(aname, mods, desc):
4213 lst = getattr(self.cluster, aname)
4214 for key, val in mods:
4215 if key == constants.DDM_ADD:
4217 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4220 elif key == constants.DDM_REMOVE:
4224 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4226 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4228 if self.op.hidden_os:
4229 helper_os("hidden_os", self.op.hidden_os, "hidden")
4231 if self.op.blacklisted_os:
4232 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4234 if self.op.master_netdev:
4235 master_params = self.cfg.GetMasterNetworkParameters()
4236 ems = self.cfg.GetUseExternalMipScript()
4237 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4238 self.cluster.master_netdev)
4239 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4241 result.Raise("Could not disable the master ip")
4242 feedback_fn("Changing master_netdev from %s to %s" %
4243 (master_params.netdev, self.op.master_netdev))
4244 self.cluster.master_netdev = self.op.master_netdev
4246 if self.op.master_netmask:
4247 master_params = self.cfg.GetMasterNetworkParameters()
4248 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4249 result = self.rpc.call_node_change_master_netmask(master_params.name,
4250 master_params.netmask,
4251 self.op.master_netmask,
4253 master_params.netdev)
4255 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4258 self.cluster.master_netmask = self.op.master_netmask
4260 self.cfg.Update(self.cluster, feedback_fn)
4262 if self.op.master_netdev:
4263 master_params = self.cfg.GetMasterNetworkParameters()
4264 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4265 self.op.master_netdev)
4266 ems = self.cfg.GetUseExternalMipScript()
4267 result = self.rpc.call_node_activate_master_ip(master_params.name,
4270 self.LogWarning("Could not re-enable the master ip on"
4271 " the master, please restart manually: %s",
4275 def _UploadHelper(lu, nodes, fname):
4276 """Helper for uploading a file and showing warnings.
4279 if os.path.exists(fname):
4280 result = lu.rpc.call_upload_file(nodes, fname)
4281 for to_node, to_result in result.items():
4282 msg = to_result.fail_msg
4284 msg = ("Copy of file %s to node %s failed: %s" %
4285 (fname, to_node, msg))
4286 lu.proc.LogWarning(msg)
4289 def _ComputeAncillaryFiles(cluster, redist):
4290 """Compute files external to Ganeti which need to be consistent.
4292 @type redist: boolean
4293 @param redist: Whether to include files which need to be redistributed
4296 # Compute files for all nodes
4298 constants.SSH_KNOWN_HOSTS_FILE,
4299 constants.CONFD_HMAC_KEY,
4300 constants.CLUSTER_DOMAIN_SECRET_FILE,
4301 constants.SPICE_CERT_FILE,
4302 constants.SPICE_CACERT_FILE,
4303 constants.RAPI_USERS_FILE,
4307 files_all.update(constants.ALL_CERT_FILES)
4308 files_all.update(ssconf.SimpleStore().GetFileList())
4310 # we need to ship at least the RAPI certificate
4311 files_all.add(constants.RAPI_CERT_FILE)
4313 if cluster.modify_etc_hosts:
4314 files_all.add(constants.ETC_HOSTS)
4316 if cluster.use_external_mip_script:
4317 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4319 # Files which are optional, these must:
4320 # - be present in one other category as well
4321 # - either exist or not exist on all nodes of that category (mc, vm all)
4323 constants.RAPI_USERS_FILE,
4326 # Files which should only be on master candidates
4330 files_mc.add(constants.CLUSTER_CONF_FILE)
4332 # Files which should only be on VM-capable nodes
4333 files_vm = set(filename
4334 for hv_name in cluster.enabled_hypervisors
4335 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4337 files_opt |= set(filename
4338 for hv_name in cluster.enabled_hypervisors
4339 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4341 # Filenames in each category must be unique
4342 all_files_set = files_all | files_mc | files_vm
4343 assert (len(all_files_set) ==
4344 sum(map(len, [files_all, files_mc, files_vm]))), \
4345 "Found file listed in more than one file list"
4347 # Optional files must be present in one other category
4348 assert all_files_set.issuperset(files_opt), \
4349 "Optional file not in a different required list"
4351 return (files_all, files_opt, files_mc, files_vm)
4354 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4355 """Distribute additional files which are part of the cluster configuration.
4357 ConfigWriter takes care of distributing the config and ssconf files, but
4358 there are more files which should be distributed to all nodes. This function
4359 makes sure those are copied.
4361 @param lu: calling logical unit
4362 @param additional_nodes: list of nodes not in the config to distribute to
4363 @type additional_vm: boolean
4364 @param additional_vm: whether the additional nodes are vm-capable or not
4367 # Gather target nodes
4368 cluster = lu.cfg.GetClusterInfo()
4369 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4371 online_nodes = lu.cfg.GetOnlineNodeList()
4372 online_set = frozenset(online_nodes)
4373 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4375 if additional_nodes is not None:
4376 online_nodes.extend(additional_nodes)
4378 vm_nodes.extend(additional_nodes)
4380 # Never distribute to master node
4381 for nodelist in [online_nodes, vm_nodes]:
4382 if master_info.name in nodelist:
4383 nodelist.remove(master_info.name)
4386 (files_all, _, files_mc, files_vm) = \
4387 _ComputeAncillaryFiles(cluster, True)
4389 # Never re-distribute configuration file from here
4390 assert not (constants.CLUSTER_CONF_FILE in files_all or
4391 constants.CLUSTER_CONF_FILE in files_vm)
4392 assert not files_mc, "Master candidates not handled in this function"
4395 (online_nodes, files_all),
4396 (vm_nodes, files_vm),
4400 for (node_list, files) in filemap:
4402 _UploadHelper(lu, node_list, fname)
4405 class LUClusterRedistConf(NoHooksLU):
4406 """Force the redistribution of cluster configuration.
4408 This is a very simple LU.
4413 def ExpandNames(self):
4414 self.needed_locks = {
4415 locking.LEVEL_NODE: locking.ALL_SET,
4417 self.share_locks[locking.LEVEL_NODE] = 1
4419 def Exec(self, feedback_fn):
4420 """Redistribute the configuration.
4423 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4424 _RedistributeAncillaryFiles(self)
4427 class LUClusterActivateMasterIp(NoHooksLU):
4428 """Activate the master IP on the master node.
4431 def Exec(self, feedback_fn):
4432 """Activate the master IP.
4435 master_params = self.cfg.GetMasterNetworkParameters()
4436 ems = self.cfg.GetUseExternalMipScript()
4437 result = self.rpc.call_node_activate_master_ip(master_params.name,
4439 result.Raise("Could not activate the master IP")
4442 class LUClusterDeactivateMasterIp(NoHooksLU):
4443 """Deactivate the master IP on the master node.
4446 def Exec(self, feedback_fn):
4447 """Deactivate the master IP.
4450 master_params = self.cfg.GetMasterNetworkParameters()
4451 ems = self.cfg.GetUseExternalMipScript()
4452 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4454 result.Raise("Could not deactivate the master IP")
4457 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4458 """Sleep and poll for an instance's disk to sync.
4461 if not instance.disks or disks is not None and not disks:
4464 disks = _ExpandCheckDisks(instance, disks)
4467 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4469 node = instance.primary_node
4472 lu.cfg.SetDiskID(dev, node)
4474 # TODO: Convert to utils.Retry
4477 degr_retries = 10 # in seconds, as we sleep 1 second each time
4481 cumul_degraded = False
4482 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4483 msg = rstats.fail_msg
4485 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4488 raise errors.RemoteError("Can't contact node %s for mirror data,"
4489 " aborting." % node)
4492 rstats = rstats.payload
4494 for i, mstat in enumerate(rstats):
4496 lu.LogWarning("Can't compute data for node %s/%s",
4497 node, disks[i].iv_name)
4500 cumul_degraded = (cumul_degraded or
4501 (mstat.is_degraded and mstat.sync_percent is None))
4502 if mstat.sync_percent is not None:
4504 if mstat.estimated_time is not None:
4505 rem_time = ("%s remaining (estimated)" %
4506 utils.FormatSeconds(mstat.estimated_time))
4507 max_time = mstat.estimated_time
4509 rem_time = "no time estimate"
4510 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4511 (disks[i].iv_name, mstat.sync_percent, rem_time))
4513 # if we're done but degraded, let's do a few small retries, to
4514 # make sure we see a stable and not transient situation; therefore
4515 # we force restart of the loop
4516 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4517 logging.info("Degraded disks found, %d retries left", degr_retries)
4525 time.sleep(min(60, max_time))
4528 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4529 return not cumul_degraded
4532 def _BlockdevFind(lu, node, dev, instance):
4533 """Wrapper around call_blockdev_find to annotate diskparams.
4535 @param lu: A reference to the lu object
4536 @param node: The node to call out
4537 @param dev: The device to find
4538 @param instance: The instance object the device belongs to
4539 @returns The result of the rpc call
4542 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4543 return lu.rpc.call_blockdev_find(node, disk)
4546 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4547 """Wrapper around L{_CheckDiskConsistencyInner}.
4550 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4551 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4555 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4557 """Check that mirrors are not degraded.
4559 @attention: The device has to be annotated already.
4561 The ldisk parameter, if True, will change the test from the
4562 is_degraded attribute (which represents overall non-ok status for
4563 the device(s)) to the ldisk (representing the local storage status).
4566 lu.cfg.SetDiskID(dev, node)
4570 if on_primary or dev.AssembleOnSecondary():
4571 rstats = lu.rpc.call_blockdev_find(node, dev)
4572 msg = rstats.fail_msg
4574 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4576 elif not rstats.payload:
4577 lu.LogWarning("Can't find disk on node %s", node)
4581 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4583 result = result and not rstats.payload.is_degraded
4586 for child in dev.children:
4587 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4593 class LUOobCommand(NoHooksLU):
4594 """Logical unit for OOB handling.
4598 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4600 def ExpandNames(self):
4601 """Gather locks we need.
4604 if self.op.node_names:
4605 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4606 lock_names = self.op.node_names
4608 lock_names = locking.ALL_SET
4610 self.needed_locks = {
4611 locking.LEVEL_NODE: lock_names,
4614 def CheckPrereq(self):
4615 """Check prerequisites.
4618 - the node exists in the configuration
4621 Any errors are signaled by raising errors.OpPrereqError.
4625 self.master_node = self.cfg.GetMasterNode()
4627 assert self.op.power_delay >= 0.0
4629 if self.op.node_names:
4630 if (self.op.command in self._SKIP_MASTER and
4631 self.master_node in self.op.node_names):
4632 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4633 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4635 if master_oob_handler:
4636 additional_text = ("run '%s %s %s' if you want to operate on the"
4637 " master regardless") % (master_oob_handler,
4641 additional_text = "it does not support out-of-band operations"
4643 raise errors.OpPrereqError(("Operating on the master node %s is not"
4644 " allowed for %s; %s") %
4645 (self.master_node, self.op.command,
4646 additional_text), errors.ECODE_INVAL)
4648 self.op.node_names = self.cfg.GetNodeList()
4649 if self.op.command in self._SKIP_MASTER:
4650 self.op.node_names.remove(self.master_node)
4652 if self.op.command in self._SKIP_MASTER:
4653 assert self.master_node not in self.op.node_names
4655 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4657 raise errors.OpPrereqError("Node %s not found" % node_name,
4660 self.nodes.append(node)
4662 if (not self.op.ignore_status and
4663 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4664 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4665 " not marked offline") % node_name,
4668 def Exec(self, feedback_fn):
4669 """Execute OOB and return result if we expect any.
4672 master_node = self.master_node
4675 for idx, node in enumerate(utils.NiceSort(self.nodes,
4676 key=lambda node: node.name)):
4677 node_entry = [(constants.RS_NORMAL, node.name)]
4678 ret.append(node_entry)
4680 oob_program = _SupportsOob(self.cfg, node)
4683 node_entry.append((constants.RS_UNAVAIL, None))
4686 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4687 self.op.command, oob_program, node.name)
4688 result = self.rpc.call_run_oob(master_node, oob_program,
4689 self.op.command, node.name,
4693 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4694 node.name, result.fail_msg)
4695 node_entry.append((constants.RS_NODATA, None))
4698 self._CheckPayload(result)
4699 except errors.OpExecError, err:
4700 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4702 node_entry.append((constants.RS_NODATA, None))
4704 if self.op.command == constants.OOB_HEALTH:
4705 # For health we should log important events
4706 for item, status in result.payload:
4707 if status in [constants.OOB_STATUS_WARNING,
4708 constants.OOB_STATUS_CRITICAL]:
4709 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4710 item, node.name, status)
4712 if self.op.command == constants.OOB_POWER_ON:
4714 elif self.op.command == constants.OOB_POWER_OFF:
4715 node.powered = False
4716 elif self.op.command == constants.OOB_POWER_STATUS:
4717 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4718 if powered != node.powered:
4719 logging.warning(("Recorded power state (%s) of node '%s' does not"
4720 " match actual power state (%s)"), node.powered,
4723 # For configuration changing commands we should update the node
4724 if self.op.command in (constants.OOB_POWER_ON,
4725 constants.OOB_POWER_OFF):
4726 self.cfg.Update(node, feedback_fn)
4728 node_entry.append((constants.RS_NORMAL, result.payload))
4730 if (self.op.command == constants.OOB_POWER_ON and
4731 idx < len(self.nodes) - 1):
4732 time.sleep(self.op.power_delay)
4736 def _CheckPayload(self, result):
4737 """Checks if the payload is valid.
4739 @param result: RPC result
4740 @raises errors.OpExecError: If payload is not valid
4744 if self.op.command == constants.OOB_HEALTH:
4745 if not isinstance(result.payload, list):
4746 errs.append("command 'health' is expected to return a list but got %s" %
4747 type(result.payload))
4749 for item, status in result.payload:
4750 if status not in constants.OOB_STATUSES:
4751 errs.append("health item '%s' has invalid status '%s'" %
4754 if self.op.command == constants.OOB_POWER_STATUS:
4755 if not isinstance(result.payload, dict):
4756 errs.append("power-status is expected to return a dict but got %s" %
4757 type(result.payload))
4759 if self.op.command in [
4760 constants.OOB_POWER_ON,
4761 constants.OOB_POWER_OFF,
4762 constants.OOB_POWER_CYCLE,
4764 if result.payload is not None:
4765 errs.append("%s is expected to not return payload but got '%s'" %
4766 (self.op.command, result.payload))
4769 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4770 utils.CommaJoin(errs))
4773 class _OsQuery(_QueryBase):
4774 FIELDS = query.OS_FIELDS
4776 def ExpandNames(self, lu):
4777 # Lock all nodes in shared mode
4778 # Temporary removal of locks, should be reverted later
4779 # TODO: reintroduce locks when they are lighter-weight
4780 lu.needed_locks = {}
4781 #self.share_locks[locking.LEVEL_NODE] = 1
4782 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4784 # The following variables interact with _QueryBase._GetNames
4786 self.wanted = self.names
4788 self.wanted = locking.ALL_SET
4790 self.do_locking = self.use_locking
4792 def DeclareLocks(self, lu, level):
4796 def _DiagnoseByOS(rlist):
4797 """Remaps a per-node return list into an a per-os per-node dictionary
4799 @param rlist: a map with node names as keys and OS objects as values
4802 @return: a dictionary with osnames as keys and as value another
4803 map, with nodes as keys and tuples of (path, status, diagnose,
4804 variants, parameters, api_versions) as values, eg::
4806 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4807 (/srv/..., False, "invalid api")],
4808 "node2": [(/srv/..., True, "", [], [])]}
4813 # we build here the list of nodes that didn't fail the RPC (at RPC
4814 # level), so that nodes with a non-responding node daemon don't
4815 # make all OSes invalid
4816 good_nodes = [node_name for node_name in rlist
4817 if not rlist[node_name].fail_msg]
4818 for node_name, nr in rlist.items():
4819 if nr.fail_msg or not nr.payload:
4821 for (name, path, status, diagnose, variants,
4822 params, api_versions) in nr.payload:
4823 if name not in all_os:
4824 # build a list of nodes for this os containing empty lists
4825 # for each node in node_list
4827 for nname in good_nodes:
4828 all_os[name][nname] = []
4829 # convert params from [name, help] to (name, help)
4830 params = [tuple(v) for v in params]
4831 all_os[name][node_name].append((path, status, diagnose,
4832 variants, params, api_versions))
4835 def _GetQueryData(self, lu):
4836 """Computes the list of nodes and their attributes.
4839 # Locking is not used
4840 assert not (compat.any(lu.glm.is_owned(level)
4841 for level in locking.LEVELS
4842 if level != locking.LEVEL_CLUSTER) or
4843 self.do_locking or self.use_locking)
4845 valid_nodes = [node.name
4846 for node in lu.cfg.GetAllNodesInfo().values()
4847 if not node.offline and node.vm_capable]
4848 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4849 cluster = lu.cfg.GetClusterInfo()
4853 for (os_name, os_data) in pol.items():
4854 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4855 hidden=(os_name in cluster.hidden_os),
4856 blacklisted=(os_name in cluster.blacklisted_os))
4860 api_versions = set()
4862 for idx, osl in enumerate(os_data.values()):
4863 info.valid = bool(info.valid and osl and osl[0][1])
4867 (node_variants, node_params, node_api) = osl[0][3:6]
4870 variants.update(node_variants)
4871 parameters.update(node_params)
4872 api_versions.update(node_api)
4874 # Filter out inconsistent values
4875 variants.intersection_update(node_variants)
4876 parameters.intersection_update(node_params)
4877 api_versions.intersection_update(node_api)
4879 info.variants = list(variants)
4880 info.parameters = list(parameters)
4881 info.api_versions = list(api_versions)
4883 data[os_name] = info
4885 # Prepare data in requested order
4886 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4890 class LUOsDiagnose(NoHooksLU):
4891 """Logical unit for OS diagnose/query.
4897 def _BuildFilter(fields, names):
4898 """Builds a filter for querying OSes.
4901 name_filter = qlang.MakeSimpleFilter("name", names)
4903 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4904 # respective field is not requested
4905 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4906 for fname in ["hidden", "blacklisted"]
4907 if fname not in fields]
4908 if "valid" not in fields:
4909 status_filter.append([qlang.OP_TRUE, "valid"])
4912 status_filter.insert(0, qlang.OP_AND)
4914 status_filter = None
4916 if name_filter and status_filter:
4917 return [qlang.OP_AND, name_filter, status_filter]
4921 return status_filter
4923 def CheckArguments(self):
4924 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4925 self.op.output_fields, False)
4927 def ExpandNames(self):
4928 self.oq.ExpandNames(self)
4930 def Exec(self, feedback_fn):
4931 return self.oq.OldStyleQuery(self)
4934 class LUNodeRemove(LogicalUnit):
4935 """Logical unit for removing a node.
4938 HPATH = "node-remove"
4939 HTYPE = constants.HTYPE_NODE
4941 def BuildHooksEnv(self):
4946 "OP_TARGET": self.op.node_name,
4947 "NODE_NAME": self.op.node_name,
4950 def BuildHooksNodes(self):
4951 """Build hooks nodes.
4953 This doesn't run on the target node in the pre phase as a failed
4954 node would then be impossible to remove.
4957 all_nodes = self.cfg.GetNodeList()
4959 all_nodes.remove(self.op.node_name)
4962 return (all_nodes, all_nodes)
4964 def CheckPrereq(self):
4965 """Check prerequisites.
4968 - the node exists in the configuration
4969 - it does not have primary or secondary instances
4970 - it's not the master
4972 Any errors are signaled by raising errors.OpPrereqError.
4975 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4976 node = self.cfg.GetNodeInfo(self.op.node_name)
4977 assert node is not None
4979 masternode = self.cfg.GetMasterNode()
4980 if node.name == masternode:
4981 raise errors.OpPrereqError("Node is the master node, failover to another"
4982 " node is required", errors.ECODE_INVAL)
4984 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4985 if node.name in instance.all_nodes:
4986 raise errors.OpPrereqError("Instance %s is still running on the node,"
4987 " please remove first" % instance_name,
4989 self.op.node_name = node.name
4992 def Exec(self, feedback_fn):
4993 """Removes the node from the cluster.
4997 logging.info("Stopping the node daemon and removing configs from node %s",
5000 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5002 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5005 # Promote nodes to master candidate as needed
5006 _AdjustCandidatePool(self, exceptions=[node.name])
5007 self.context.RemoveNode(node.name)
5009 # Run post hooks on the node before it's removed
5010 _RunPostHook(self, node.name)
5012 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5013 msg = result.fail_msg
5015 self.LogWarning("Errors encountered on the remote node while leaving"
5016 " the cluster: %s", msg)
5018 # Remove node from our /etc/hosts
5019 if self.cfg.GetClusterInfo().modify_etc_hosts:
5020 master_node = self.cfg.GetMasterNode()
5021 result = self.rpc.call_etc_hosts_modify(master_node,
5022 constants.ETC_HOSTS_REMOVE,
5024 result.Raise("Can't update hosts file with new host data")
5025 _RedistributeAncillaryFiles(self)
5028 class _NodeQuery(_QueryBase):
5029 FIELDS = query.NODE_FIELDS
5031 def ExpandNames(self, lu):
5032 lu.needed_locks = {}
5033 lu.share_locks = _ShareAll()
5036 self.wanted = _GetWantedNodes(lu, self.names)
5038 self.wanted = locking.ALL_SET
5040 self.do_locking = (self.use_locking and
5041 query.NQ_LIVE in self.requested_data)
5044 # If any non-static field is requested we need to lock the nodes
5045 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5047 def DeclareLocks(self, lu, level):
5050 def _GetQueryData(self, lu):
5051 """Computes the list of nodes and their attributes.
5054 all_info = lu.cfg.GetAllNodesInfo()
5056 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5058 # Gather data as requested
5059 if query.NQ_LIVE in self.requested_data:
5060 # filter out non-vm_capable nodes
5061 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5063 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5064 [lu.cfg.GetHypervisorType()])
5065 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5066 for (name, nresult) in node_data.items()
5067 if not nresult.fail_msg and nresult.payload)
5071 if query.NQ_INST in self.requested_data:
5072 node_to_primary = dict([(name, set()) for name in nodenames])
5073 node_to_secondary = dict([(name, set()) for name in nodenames])
5075 inst_data = lu.cfg.GetAllInstancesInfo()
5077 for inst in inst_data.values():
5078 if inst.primary_node in node_to_primary:
5079 node_to_primary[inst.primary_node].add(inst.name)
5080 for secnode in inst.secondary_nodes:
5081 if secnode in node_to_secondary:
5082 node_to_secondary[secnode].add(inst.name)
5084 node_to_primary = None
5085 node_to_secondary = None
5087 if query.NQ_OOB in self.requested_data:
5088 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5089 for name, node in all_info.iteritems())
5093 if query.NQ_GROUP in self.requested_data:
5094 groups = lu.cfg.GetAllNodeGroupsInfo()
5098 return query.NodeQueryData([all_info[name] for name in nodenames],
5099 live_data, lu.cfg.GetMasterNode(),
5100 node_to_primary, node_to_secondary, groups,
5101 oob_support, lu.cfg.GetClusterInfo())
5104 class LUNodeQuery(NoHooksLU):
5105 """Logical unit for querying nodes.
5108 # pylint: disable=W0142
5111 def CheckArguments(self):
5112 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5113 self.op.output_fields, self.op.use_locking)
5115 def ExpandNames(self):
5116 self.nq.ExpandNames(self)
5118 def DeclareLocks(self, level):
5119 self.nq.DeclareLocks(self, level)
5121 def Exec(self, feedback_fn):
5122 return self.nq.OldStyleQuery(self)
5125 class LUNodeQueryvols(NoHooksLU):
5126 """Logical unit for getting volumes on node(s).
5130 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5131 _FIELDS_STATIC = utils.FieldSet("node")
5133 def CheckArguments(self):
5134 _CheckOutputFields(static=self._FIELDS_STATIC,
5135 dynamic=self._FIELDS_DYNAMIC,
5136 selected=self.op.output_fields)
5138 def ExpandNames(self):
5139 self.share_locks = _ShareAll()
5140 self.needed_locks = {}
5142 if not self.op.nodes:
5143 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5145 self.needed_locks[locking.LEVEL_NODE] = \
5146 _GetWantedNodes(self, self.op.nodes)
5148 def Exec(self, feedback_fn):
5149 """Computes the list of nodes and their attributes.
5152 nodenames = self.owned_locks(locking.LEVEL_NODE)
5153 volumes = self.rpc.call_node_volumes(nodenames)
5155 ilist = self.cfg.GetAllInstancesInfo()
5156 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5159 for node in nodenames:
5160 nresult = volumes[node]
5163 msg = nresult.fail_msg
5165 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5168 node_vols = sorted(nresult.payload,
5169 key=operator.itemgetter("dev"))
5171 for vol in node_vols:
5173 for field in self.op.output_fields:
5176 elif field == "phys":
5180 elif field == "name":
5182 elif field == "size":
5183 val = int(float(vol["size"]))
5184 elif field == "instance":
5185 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5187 raise errors.ParameterError(field)
5188 node_output.append(str(val))
5190 output.append(node_output)
5195 class LUNodeQueryStorage(NoHooksLU):
5196 """Logical unit for getting information on storage units on node(s).
5199 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5202 def CheckArguments(self):
5203 _CheckOutputFields(static=self._FIELDS_STATIC,
5204 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5205 selected=self.op.output_fields)
5207 def ExpandNames(self):
5208 self.share_locks = _ShareAll()
5209 self.needed_locks = {}
5212 self.needed_locks[locking.LEVEL_NODE] = \
5213 _GetWantedNodes(self, self.op.nodes)
5215 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5217 def Exec(self, feedback_fn):
5218 """Computes the list of nodes and their attributes.
5221 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5223 # Always get name to sort by
5224 if constants.SF_NAME in self.op.output_fields:
5225 fields = self.op.output_fields[:]
5227 fields = [constants.SF_NAME] + self.op.output_fields
5229 # Never ask for node or type as it's only known to the LU
5230 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5231 while extra in fields:
5232 fields.remove(extra)
5234 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5235 name_idx = field_idx[constants.SF_NAME]
5237 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5238 data = self.rpc.call_storage_list(self.nodes,
5239 self.op.storage_type, st_args,
5240 self.op.name, fields)
5244 for node in utils.NiceSort(self.nodes):
5245 nresult = data[node]
5249 msg = nresult.fail_msg
5251 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5254 rows = dict([(row[name_idx], row) for row in nresult.payload])
5256 for name in utils.NiceSort(rows.keys()):
5261 for field in self.op.output_fields:
5262 if field == constants.SF_NODE:
5264 elif field == constants.SF_TYPE:
5265 val = self.op.storage_type
5266 elif field in field_idx:
5267 val = row[field_idx[field]]
5269 raise errors.ParameterError(field)
5278 class _InstanceQuery(_QueryBase):
5279 FIELDS = query.INSTANCE_FIELDS
5281 def ExpandNames(self, lu):
5282 lu.needed_locks = {}
5283 lu.share_locks = _ShareAll()
5286 self.wanted = _GetWantedInstances(lu, self.names)
5288 self.wanted = locking.ALL_SET
5290 self.do_locking = (self.use_locking and
5291 query.IQ_LIVE in self.requested_data)
5293 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5294 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5295 lu.needed_locks[locking.LEVEL_NODE] = []
5296 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5298 self.do_grouplocks = (self.do_locking and
5299 query.IQ_NODES in self.requested_data)
5301 def DeclareLocks(self, lu, level):
5303 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5304 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5306 # Lock all groups used by instances optimistically; this requires going
5307 # via the node before it's locked, requiring verification later on
5308 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5310 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5311 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5312 elif level == locking.LEVEL_NODE:
5313 lu._LockInstancesNodes() # pylint: disable=W0212
5316 def _CheckGroupLocks(lu):
5317 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5318 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5320 # Check if node groups for locked instances are still correct
5321 for instance_name in owned_instances:
5322 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5324 def _GetQueryData(self, lu):
5325 """Computes the list of instances and their attributes.
5328 if self.do_grouplocks:
5329 self._CheckGroupLocks(lu)
5331 cluster = lu.cfg.GetClusterInfo()
5332 all_info = lu.cfg.GetAllInstancesInfo()
5334 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5336 instance_list = [all_info[name] for name in instance_names]
5337 nodes = frozenset(itertools.chain(*(inst.all_nodes
5338 for inst in instance_list)))
5339 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5342 wrongnode_inst = set()
5344 # Gather data as requested
5345 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5347 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5349 result = node_data[name]
5351 # offline nodes will be in both lists
5352 assert result.fail_msg
5353 offline_nodes.append(name)
5355 bad_nodes.append(name)
5356 elif result.payload:
5357 for inst in result.payload:
5358 if inst in all_info:
5359 if all_info[inst].primary_node == name:
5360 live_data.update(result.payload)
5362 wrongnode_inst.add(inst)
5364 # orphan instance; we don't list it here as we don't
5365 # handle this case yet in the output of instance listing
5366 logging.warning("Orphan instance '%s' found on node %s",
5368 # else no instance is alive
5372 if query.IQ_DISKUSAGE in self.requested_data:
5373 disk_usage = dict((inst.name,
5374 _ComputeDiskSize(inst.disk_template,
5375 [{constants.IDISK_SIZE: disk.size}
5376 for disk in inst.disks]))
5377 for inst in instance_list)
5381 if query.IQ_CONSOLE in self.requested_data:
5383 for inst in instance_list:
5384 if inst.name in live_data:
5385 # Instance is running
5386 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5388 consinfo[inst.name] = None
5389 assert set(consinfo.keys()) == set(instance_names)
5393 if query.IQ_NODES in self.requested_data:
5394 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5396 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5397 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5398 for uuid in set(map(operator.attrgetter("group"),
5404 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5405 disk_usage, offline_nodes, bad_nodes,
5406 live_data, wrongnode_inst, consinfo,
5410 class LUQuery(NoHooksLU):
5411 """Query for resources/items of a certain kind.
5414 # pylint: disable=W0142
5417 def CheckArguments(self):
5418 qcls = _GetQueryImplementation(self.op.what)
5420 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5422 def ExpandNames(self):
5423 self.impl.ExpandNames(self)
5425 def DeclareLocks(self, level):
5426 self.impl.DeclareLocks(self, level)
5428 def Exec(self, feedback_fn):
5429 return self.impl.NewStyleQuery(self)
5432 class LUQueryFields(NoHooksLU):
5433 """Query for resources/items of a certain kind.
5436 # pylint: disable=W0142
5439 def CheckArguments(self):
5440 self.qcls = _GetQueryImplementation(self.op.what)
5442 def ExpandNames(self):
5443 self.needed_locks = {}
5445 def Exec(self, feedback_fn):
5446 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5449 class LUNodeModifyStorage(NoHooksLU):
5450 """Logical unit for modifying a storage volume on a node.
5455 def CheckArguments(self):
5456 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5458 storage_type = self.op.storage_type
5461 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5463 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5464 " modified" % storage_type,
5467 diff = set(self.op.changes.keys()) - modifiable
5469 raise errors.OpPrereqError("The following fields can not be modified for"
5470 " storage units of type '%s': %r" %
5471 (storage_type, list(diff)),
5474 def ExpandNames(self):
5475 self.needed_locks = {
5476 locking.LEVEL_NODE: self.op.node_name,
5479 def Exec(self, feedback_fn):
5480 """Computes the list of nodes and their attributes.
5483 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5484 result = self.rpc.call_storage_modify(self.op.node_name,
5485 self.op.storage_type, st_args,
5486 self.op.name, self.op.changes)
5487 result.Raise("Failed to modify storage unit '%s' on %s" %
5488 (self.op.name, self.op.node_name))
5491 class LUNodeAdd(LogicalUnit):
5492 """Logical unit for adding node to the cluster.
5496 HTYPE = constants.HTYPE_NODE
5497 _NFLAGS = ["master_capable", "vm_capable"]
5499 def CheckArguments(self):
5500 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5501 # validate/normalize the node name
5502 self.hostname = netutils.GetHostname(name=self.op.node_name,
5503 family=self.primary_ip_family)
5504 self.op.node_name = self.hostname.name
5506 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5507 raise errors.OpPrereqError("Cannot readd the master node",
5510 if self.op.readd and self.op.group:
5511 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5512 " being readded", errors.ECODE_INVAL)
5514 def BuildHooksEnv(self):
5517 This will run on all nodes before, and on all nodes + the new node after.
5521 "OP_TARGET": self.op.node_name,
5522 "NODE_NAME": self.op.node_name,
5523 "NODE_PIP": self.op.primary_ip,
5524 "NODE_SIP": self.op.secondary_ip,
5525 "MASTER_CAPABLE": str(self.op.master_capable),
5526 "VM_CAPABLE": str(self.op.vm_capable),
5529 def BuildHooksNodes(self):
5530 """Build hooks nodes.
5533 # Exclude added node
5534 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5535 post_nodes = pre_nodes + [self.op.node_name, ]
5537 return (pre_nodes, post_nodes)
5539 def CheckPrereq(self):
5540 """Check prerequisites.
5543 - the new node is not already in the config
5545 - its parameters (single/dual homed) matches the cluster
5547 Any errors are signaled by raising errors.OpPrereqError.
5551 hostname = self.hostname
5552 node = hostname.name
5553 primary_ip = self.op.primary_ip = hostname.ip
5554 if self.op.secondary_ip is None:
5555 if self.primary_ip_family == netutils.IP6Address.family:
5556 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5557 " IPv4 address must be given as secondary",
5559 self.op.secondary_ip = primary_ip
5561 secondary_ip = self.op.secondary_ip
5562 if not netutils.IP4Address.IsValid(secondary_ip):
5563 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5564 " address" % secondary_ip, errors.ECODE_INVAL)
5566 node_list = cfg.GetNodeList()
5567 if not self.op.readd and node in node_list:
5568 raise errors.OpPrereqError("Node %s is already in the configuration" %
5569 node, errors.ECODE_EXISTS)
5570 elif self.op.readd and node not in node_list:
5571 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5574 self.changed_primary_ip = False
5576 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5577 if self.op.readd and node == existing_node_name:
5578 if existing_node.secondary_ip != secondary_ip:
5579 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5580 " address configuration as before",
5582 if existing_node.primary_ip != primary_ip:
5583 self.changed_primary_ip = True
5587 if (existing_node.primary_ip == primary_ip or
5588 existing_node.secondary_ip == primary_ip or
5589 existing_node.primary_ip == secondary_ip or
5590 existing_node.secondary_ip == secondary_ip):
5591 raise errors.OpPrereqError("New node ip address(es) conflict with"
5592 " existing node %s" % existing_node.name,
5593 errors.ECODE_NOTUNIQUE)
5595 # After this 'if' block, None is no longer a valid value for the
5596 # _capable op attributes
5598 old_node = self.cfg.GetNodeInfo(node)
5599 assert old_node is not None, "Can't retrieve locked node %s" % node
5600 for attr in self._NFLAGS:
5601 if getattr(self.op, attr) is None:
5602 setattr(self.op, attr, getattr(old_node, attr))
5604 for attr in self._NFLAGS:
5605 if getattr(self.op, attr) is None:
5606 setattr(self.op, attr, True)
5608 if self.op.readd and not self.op.vm_capable:
5609 pri, sec = cfg.GetNodeInstances(node)
5611 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5612 " flag set to false, but it already holds"
5613 " instances" % node,
5616 # check that the type of the node (single versus dual homed) is the
5617 # same as for the master
5618 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5619 master_singlehomed = myself.secondary_ip == myself.primary_ip
5620 newbie_singlehomed = secondary_ip == primary_ip
5621 if master_singlehomed != newbie_singlehomed:
5622 if master_singlehomed:
5623 raise errors.OpPrereqError("The master has no secondary ip but the"
5624 " new node has one",
5627 raise errors.OpPrereqError("The master has a secondary ip but the"
5628 " new node doesn't have one",
5631 # checks reachability
5632 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5633 raise errors.OpPrereqError("Node not reachable by ping",
5634 errors.ECODE_ENVIRON)
5636 if not newbie_singlehomed:
5637 # check reachability from my secondary ip to newbie's secondary ip
5638 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5639 source=myself.secondary_ip):
5640 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5641 " based ping to node daemon port",
5642 errors.ECODE_ENVIRON)
5649 if self.op.master_capable:
5650 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5652 self.master_candidate = False
5655 self.new_node = old_node
5657 node_group = cfg.LookupNodeGroup(self.op.group)
5658 self.new_node = objects.Node(name=node,
5659 primary_ip=primary_ip,
5660 secondary_ip=secondary_ip,
5661 master_candidate=self.master_candidate,
5662 offline=False, drained=False,
5665 if self.op.ndparams:
5666 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5668 if self.op.hv_state:
5669 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5671 if self.op.disk_state:
5672 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5674 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5675 # it a property on the base class.
5676 result = rpc.DnsOnlyRunner().call_version([node])[node]
5677 result.Raise("Can't get version information from node %s" % node)
5678 if constants.PROTOCOL_VERSION == result.payload:
5679 logging.info("Communication to node %s fine, sw version %s match",
5680 node, result.payload)
5682 raise errors.OpPrereqError("Version mismatch master version %s,"
5683 " node version %s" %
5684 (constants.PROTOCOL_VERSION, result.payload),
5685 errors.ECODE_ENVIRON)
5687 def Exec(self, feedback_fn):
5688 """Adds the new node to the cluster.
5691 new_node = self.new_node
5692 node = new_node.name
5694 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5697 # We adding a new node so we assume it's powered
5698 new_node.powered = True
5700 # for re-adds, reset the offline/drained/master-candidate flags;
5701 # we need to reset here, otherwise offline would prevent RPC calls
5702 # later in the procedure; this also means that if the re-add
5703 # fails, we are left with a non-offlined, broken node
5705 new_node.drained = new_node.offline = False # pylint: disable=W0201
5706 self.LogInfo("Readding a node, the offline/drained flags were reset")
5707 # if we demote the node, we do cleanup later in the procedure
5708 new_node.master_candidate = self.master_candidate
5709 if self.changed_primary_ip:
5710 new_node.primary_ip = self.op.primary_ip
5712 # copy the master/vm_capable flags
5713 for attr in self._NFLAGS:
5714 setattr(new_node, attr, getattr(self.op, attr))
5716 # notify the user about any possible mc promotion
5717 if new_node.master_candidate:
5718 self.LogInfo("Node will be a master candidate")
5720 if self.op.ndparams:
5721 new_node.ndparams = self.op.ndparams
5723 new_node.ndparams = {}
5725 if self.op.hv_state:
5726 new_node.hv_state_static = self.new_hv_state
5728 if self.op.disk_state:
5729 new_node.disk_state_static = self.new_disk_state
5731 # Add node to our /etc/hosts, and add key to known_hosts
5732 if self.cfg.GetClusterInfo().modify_etc_hosts:
5733 master_node = self.cfg.GetMasterNode()
5734 result = self.rpc.call_etc_hosts_modify(master_node,
5735 constants.ETC_HOSTS_ADD,
5738 result.Raise("Can't update hosts file with new host data")
5740 if new_node.secondary_ip != new_node.primary_ip:
5741 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5744 node_verify_list = [self.cfg.GetMasterNode()]
5745 node_verify_param = {
5746 constants.NV_NODELIST: ([node], {}),
5747 # TODO: do a node-net-test as well?
5750 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5751 self.cfg.GetClusterName())
5752 for verifier in node_verify_list:
5753 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5754 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5756 for failed in nl_payload:
5757 feedback_fn("ssh/hostname verification failed"
5758 " (checking from %s): %s" %
5759 (verifier, nl_payload[failed]))
5760 raise errors.OpExecError("ssh/hostname verification failed")
5763 _RedistributeAncillaryFiles(self)
5764 self.context.ReaddNode(new_node)
5765 # make sure we redistribute the config
5766 self.cfg.Update(new_node, feedback_fn)
5767 # and make sure the new node will not have old files around
5768 if not new_node.master_candidate:
5769 result = self.rpc.call_node_demote_from_mc(new_node.name)
5770 msg = result.fail_msg
5772 self.LogWarning("Node failed to demote itself from master"
5773 " candidate status: %s" % msg)
5775 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5776 additional_vm=self.op.vm_capable)
5777 self.context.AddNode(new_node, self.proc.GetECId())
5780 class LUNodeSetParams(LogicalUnit):
5781 """Modifies the parameters of a node.
5783 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5784 to the node role (as _ROLE_*)
5785 @cvar _R2F: a dictionary from node role to tuples of flags
5786 @cvar _FLAGS: a list of attribute names corresponding to the flags
5789 HPATH = "node-modify"
5790 HTYPE = constants.HTYPE_NODE
5792 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5794 (True, False, False): _ROLE_CANDIDATE,
5795 (False, True, False): _ROLE_DRAINED,
5796 (False, False, True): _ROLE_OFFLINE,
5797 (False, False, False): _ROLE_REGULAR,
5799 _R2F = dict((v, k) for k, v in _F2R.items())
5800 _FLAGS = ["master_candidate", "drained", "offline"]
5802 def CheckArguments(self):
5803 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5804 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5805 self.op.master_capable, self.op.vm_capable,
5806 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5808 if all_mods.count(None) == len(all_mods):
5809 raise errors.OpPrereqError("Please pass at least one modification",
5811 if all_mods.count(True) > 1:
5812 raise errors.OpPrereqError("Can't set the node into more than one"
5813 " state at the same time",
5816 # Boolean value that tells us whether we might be demoting from MC
5817 self.might_demote = (self.op.master_candidate == False or
5818 self.op.offline == True or
5819 self.op.drained == True or
5820 self.op.master_capable == False)
5822 if self.op.secondary_ip:
5823 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5824 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5825 " address" % self.op.secondary_ip,
5828 self.lock_all = self.op.auto_promote and self.might_demote
5829 self.lock_instances = self.op.secondary_ip is not None
5831 def _InstanceFilter(self, instance):
5832 """Filter for getting affected instances.
5835 return (instance.disk_template in constants.DTS_INT_MIRROR and
5836 self.op.node_name in instance.all_nodes)
5838 def ExpandNames(self):
5840 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5842 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5844 # Since modifying a node can have severe effects on currently running
5845 # operations the resource lock is at least acquired in shared mode
5846 self.needed_locks[locking.LEVEL_NODE_RES] = \
5847 self.needed_locks[locking.LEVEL_NODE]
5849 # Get node resource and instance locks in shared mode; they are not used
5850 # for anything but read-only access
5851 self.share_locks[locking.LEVEL_NODE_RES] = 1
5852 self.share_locks[locking.LEVEL_INSTANCE] = 1
5854 if self.lock_instances:
5855 self.needed_locks[locking.LEVEL_INSTANCE] = \
5856 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5858 def BuildHooksEnv(self):
5861 This runs on the master node.
5865 "OP_TARGET": self.op.node_name,
5866 "MASTER_CANDIDATE": str(self.op.master_candidate),
5867 "OFFLINE": str(self.op.offline),
5868 "DRAINED": str(self.op.drained),
5869 "MASTER_CAPABLE": str(self.op.master_capable),
5870 "VM_CAPABLE": str(self.op.vm_capable),
5873 def BuildHooksNodes(self):
5874 """Build hooks nodes.
5877 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5880 def CheckPrereq(self):
5881 """Check prerequisites.
5883 This only checks the instance list against the existing names.
5886 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5888 if self.lock_instances:
5889 affected_instances = \
5890 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5892 # Verify instance locks
5893 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5894 wanted_instances = frozenset(affected_instances.keys())
5895 if wanted_instances - owned_instances:
5896 raise errors.OpPrereqError("Instances affected by changing node %s's"
5897 " secondary IP address have changed since"
5898 " locks were acquired, wanted '%s', have"
5899 " '%s'; retry the operation" %
5901 utils.CommaJoin(wanted_instances),
5902 utils.CommaJoin(owned_instances)),
5905 affected_instances = None
5907 if (self.op.master_candidate is not None or
5908 self.op.drained is not None or
5909 self.op.offline is not None):
5910 # we can't change the master's node flags
5911 if self.op.node_name == self.cfg.GetMasterNode():
5912 raise errors.OpPrereqError("The master role can be changed"
5913 " only via master-failover",
5916 if self.op.master_candidate and not node.master_capable:
5917 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5918 " it a master candidate" % node.name,
5921 if self.op.vm_capable == False:
5922 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5924 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5925 " the vm_capable flag" % node.name,
5928 if node.master_candidate and self.might_demote and not self.lock_all:
5929 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5930 # check if after removing the current node, we're missing master
5932 (mc_remaining, mc_should, _) = \
5933 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5934 if mc_remaining < mc_should:
5935 raise errors.OpPrereqError("Not enough master candidates, please"
5936 " pass auto promote option to allow"
5937 " promotion (--auto-promote or RAPI"
5938 " auto_promote=True)", errors.ECODE_STATE)
5940 self.old_flags = old_flags = (node.master_candidate,
5941 node.drained, node.offline)
5942 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5943 self.old_role = old_role = self._F2R[old_flags]
5945 # Check for ineffective changes
5946 for attr in self._FLAGS:
5947 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5948 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5949 setattr(self.op, attr, None)
5951 # Past this point, any flag change to False means a transition
5952 # away from the respective state, as only real changes are kept
5954 # TODO: We might query the real power state if it supports OOB
5955 if _SupportsOob(self.cfg, node):
5956 if self.op.offline is False and not (node.powered or
5957 self.op.powered == True):
5958 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5959 " offline status can be reset") %
5961 elif self.op.powered is not None:
5962 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5963 " as it does not support out-of-band"
5964 " handling") % self.op.node_name)
5966 # If we're being deofflined/drained, we'll MC ourself if needed
5967 if (self.op.drained == False or self.op.offline == False or
5968 (self.op.master_capable and not node.master_capable)):
5969 if _DecideSelfPromotion(self):
5970 self.op.master_candidate = True
5971 self.LogInfo("Auto-promoting node to master candidate")
5973 # If we're no longer master capable, we'll demote ourselves from MC
5974 if self.op.master_capable == False and node.master_candidate:
5975 self.LogInfo("Demoting from master candidate")
5976 self.op.master_candidate = False
5979 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5980 if self.op.master_candidate:
5981 new_role = self._ROLE_CANDIDATE
5982 elif self.op.drained:
5983 new_role = self._ROLE_DRAINED
5984 elif self.op.offline:
5985 new_role = self._ROLE_OFFLINE
5986 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5987 # False is still in new flags, which means we're un-setting (the
5989 new_role = self._ROLE_REGULAR
5990 else: # no new flags, nothing, keep old role
5993 self.new_role = new_role
5995 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5996 # Trying to transition out of offline status
5997 result = self.rpc.call_version([node.name])[node.name]
5999 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6000 " to report its version: %s" %
6001 (node.name, result.fail_msg),
6004 self.LogWarning("Transitioning node from offline to online state"
6005 " without using re-add. Please make sure the node"
6008 # When changing the secondary ip, verify if this is a single-homed to
6009 # multi-homed transition or vice versa, and apply the relevant
6011 if self.op.secondary_ip:
6012 # Ok even without locking, because this can't be changed by any LU
6013 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6014 master_singlehomed = master.secondary_ip == master.primary_ip
6015 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6016 if self.op.force and node.name == master.name:
6017 self.LogWarning("Transitioning from single-homed to multi-homed"
6018 " cluster. All nodes will require a secondary ip.")
6020 raise errors.OpPrereqError("Changing the secondary ip on a"
6021 " single-homed cluster requires the"
6022 " --force option to be passed, and the"
6023 " target node to be the master",
6025 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6026 if self.op.force and node.name == master.name:
6027 self.LogWarning("Transitioning from multi-homed to single-homed"
6028 " cluster. Secondary IPs will have to be removed.")
6030 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6031 " same as the primary IP on a multi-homed"
6032 " cluster, unless the --force option is"
6033 " passed, and the target node is the"
6034 " master", errors.ECODE_INVAL)
6036 assert not (frozenset(affected_instances) -
6037 self.owned_locks(locking.LEVEL_INSTANCE))
6040 if affected_instances:
6041 raise errors.OpPrereqError("Cannot change secondary IP address:"
6042 " offline node has instances (%s)"
6043 " configured to use it" %
6044 utils.CommaJoin(affected_instances.keys()))
6046 # On online nodes, check that no instances are running, and that
6047 # the node has the new ip and we can reach it.
6048 for instance in affected_instances.values():
6049 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6050 msg="cannot change secondary ip")
6052 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6053 if master.name != node.name:
6054 # check reachability from master secondary ip to new secondary ip
6055 if not netutils.TcpPing(self.op.secondary_ip,
6056 constants.DEFAULT_NODED_PORT,
6057 source=master.secondary_ip):
6058 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6059 " based ping to node daemon port",
6060 errors.ECODE_ENVIRON)
6062 if self.op.ndparams:
6063 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6064 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6065 self.new_ndparams = new_ndparams
6067 if self.op.hv_state:
6068 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6069 self.node.hv_state_static)
6071 if self.op.disk_state:
6072 self.new_disk_state = \
6073 _MergeAndVerifyDiskState(self.op.disk_state,
6074 self.node.disk_state_static)
6076 def Exec(self, feedback_fn):
6081 old_role = self.old_role
6082 new_role = self.new_role
6086 if self.op.ndparams:
6087 node.ndparams = self.new_ndparams
6089 if self.op.powered is not None:
6090 node.powered = self.op.powered
6092 if self.op.hv_state:
6093 node.hv_state_static = self.new_hv_state
6095 if self.op.disk_state:
6096 node.disk_state_static = self.new_disk_state
6098 for attr in ["master_capable", "vm_capable"]:
6099 val = getattr(self.op, attr)
6101 setattr(node, attr, val)
6102 result.append((attr, str(val)))
6104 if new_role != old_role:
6105 # Tell the node to demote itself, if no longer MC and not offline
6106 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6107 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6109 self.LogWarning("Node failed to demote itself: %s", msg)
6111 new_flags = self._R2F[new_role]
6112 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6114 result.append((desc, str(nf)))
6115 (node.master_candidate, node.drained, node.offline) = new_flags
6117 # we locked all nodes, we adjust the CP before updating this node
6119 _AdjustCandidatePool(self, [node.name])
6121 if self.op.secondary_ip:
6122 node.secondary_ip = self.op.secondary_ip
6123 result.append(("secondary_ip", self.op.secondary_ip))
6125 # this will trigger configuration file update, if needed
6126 self.cfg.Update(node, feedback_fn)
6128 # this will trigger job queue propagation or cleanup if the mc
6130 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6131 self.context.ReaddNode(node)
6136 class LUNodePowercycle(NoHooksLU):
6137 """Powercycles a node.
6142 def CheckArguments(self):
6143 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6144 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6145 raise errors.OpPrereqError("The node is the master and the force"
6146 " parameter was not set",
6149 def ExpandNames(self):
6150 """Locking for PowercycleNode.
6152 This is a last-resort option and shouldn't block on other
6153 jobs. Therefore, we grab no locks.
6156 self.needed_locks = {}
6158 def Exec(self, feedback_fn):
6162 result = self.rpc.call_node_powercycle(self.op.node_name,
6163 self.cfg.GetHypervisorType())
6164 result.Raise("Failed to schedule the reboot")
6165 return result.payload
6168 class LUClusterQuery(NoHooksLU):
6169 """Query cluster configuration.
6174 def ExpandNames(self):
6175 self.needed_locks = {}
6177 def Exec(self, feedback_fn):
6178 """Return cluster config.
6181 cluster = self.cfg.GetClusterInfo()
6184 # Filter just for enabled hypervisors
6185 for os_name, hv_dict in cluster.os_hvp.items():
6186 os_hvp[os_name] = {}
6187 for hv_name, hv_params in hv_dict.items():
6188 if hv_name in cluster.enabled_hypervisors:
6189 os_hvp[os_name][hv_name] = hv_params
6191 # Convert ip_family to ip_version
6192 primary_ip_version = constants.IP4_VERSION
6193 if cluster.primary_ip_family == netutils.IP6Address.family:
6194 primary_ip_version = constants.IP6_VERSION
6197 "software_version": constants.RELEASE_VERSION,
6198 "protocol_version": constants.PROTOCOL_VERSION,
6199 "config_version": constants.CONFIG_VERSION,
6200 "os_api_version": max(constants.OS_API_VERSIONS),
6201 "export_version": constants.EXPORT_VERSION,
6202 "architecture": runtime.GetArchInfo(),
6203 "name": cluster.cluster_name,
6204 "master": cluster.master_node,
6205 "default_hypervisor": cluster.primary_hypervisor,
6206 "enabled_hypervisors": cluster.enabled_hypervisors,
6207 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6208 for hypervisor_name in cluster.enabled_hypervisors]),
6210 "beparams": cluster.beparams,
6211 "osparams": cluster.osparams,
6212 "ipolicy": cluster.ipolicy,
6213 "nicparams": cluster.nicparams,
6214 "ndparams": cluster.ndparams,
6215 "diskparams": cluster.diskparams,
6216 "candidate_pool_size": cluster.candidate_pool_size,
6217 "master_netdev": cluster.master_netdev,
6218 "master_netmask": cluster.master_netmask,
6219 "use_external_mip_script": cluster.use_external_mip_script,
6220 "volume_group_name": cluster.volume_group_name,
6221 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6222 "file_storage_dir": cluster.file_storage_dir,
6223 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6224 "maintain_node_health": cluster.maintain_node_health,
6225 "ctime": cluster.ctime,
6226 "mtime": cluster.mtime,
6227 "uuid": cluster.uuid,
6228 "tags": list(cluster.GetTags()),
6229 "uid_pool": cluster.uid_pool,
6230 "default_iallocator": cluster.default_iallocator,
6231 "reserved_lvs": cluster.reserved_lvs,
6232 "primary_ip_version": primary_ip_version,
6233 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6234 "hidden_os": cluster.hidden_os,
6235 "blacklisted_os": cluster.blacklisted_os,
6241 class LUClusterConfigQuery(NoHooksLU):
6242 """Return configuration values.
6247 def CheckArguments(self):
6248 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6250 def ExpandNames(self):
6251 self.cq.ExpandNames(self)
6253 def DeclareLocks(self, level):
6254 self.cq.DeclareLocks(self, level)
6256 def Exec(self, feedback_fn):
6257 result = self.cq.OldStyleQuery(self)
6259 assert len(result) == 1
6264 class _ClusterQuery(_QueryBase):
6265 FIELDS = query.CLUSTER_FIELDS
6267 #: Do not sort (there is only one item)
6270 def ExpandNames(self, lu):
6271 lu.needed_locks = {}
6273 # The following variables interact with _QueryBase._GetNames
6274 self.wanted = locking.ALL_SET
6275 self.do_locking = self.use_locking
6278 raise errors.OpPrereqError("Can not use locking for cluster queries",
6281 def DeclareLocks(self, lu, level):
6284 def _GetQueryData(self, lu):
6285 """Computes the list of nodes and their attributes.
6288 # Locking is not used
6289 assert not (compat.any(lu.glm.is_owned(level)
6290 for level in locking.LEVELS
6291 if level != locking.LEVEL_CLUSTER) or
6292 self.do_locking or self.use_locking)
6294 if query.CQ_CONFIG in self.requested_data:
6295 cluster = lu.cfg.GetClusterInfo()
6297 cluster = NotImplemented
6299 if query.CQ_QUEUE_DRAINED in self.requested_data:
6300 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6302 drain_flag = NotImplemented
6304 if query.CQ_WATCHER_PAUSE in self.requested_data:
6305 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6307 watcher_pause = NotImplemented
6309 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6312 class LUInstanceActivateDisks(NoHooksLU):
6313 """Bring up an instance's disks.
6318 def ExpandNames(self):
6319 self._ExpandAndLockInstance()
6320 self.needed_locks[locking.LEVEL_NODE] = []
6321 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6323 def DeclareLocks(self, level):
6324 if level == locking.LEVEL_NODE:
6325 self._LockInstancesNodes()
6327 def CheckPrereq(self):
6328 """Check prerequisites.
6330 This checks that the instance is in the cluster.
6333 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6334 assert self.instance is not None, \
6335 "Cannot retrieve locked instance %s" % self.op.instance_name
6336 _CheckNodeOnline(self, self.instance.primary_node)
6338 def Exec(self, feedback_fn):
6339 """Activate the disks.
6342 disks_ok, disks_info = \
6343 _AssembleInstanceDisks(self, self.instance,
6344 ignore_size=self.op.ignore_size)
6346 raise errors.OpExecError("Cannot activate block devices")
6348 if self.op.wait_for_sync:
6349 if not _WaitForSync(self, self.instance):
6350 raise errors.OpExecError("Some disks of the instance are degraded!")
6355 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6357 """Prepare the block devices for an instance.
6359 This sets up the block devices on all nodes.
6361 @type lu: L{LogicalUnit}
6362 @param lu: the logical unit on whose behalf we execute
6363 @type instance: L{objects.Instance}
6364 @param instance: the instance for whose disks we assemble
6365 @type disks: list of L{objects.Disk} or None
6366 @param disks: which disks to assemble (or all, if None)
6367 @type ignore_secondaries: boolean
6368 @param ignore_secondaries: if true, errors on secondary nodes
6369 won't result in an error return from the function
6370 @type ignore_size: boolean
6371 @param ignore_size: if true, the current known size of the disk
6372 will not be used during the disk activation, useful for cases
6373 when the size is wrong
6374 @return: False if the operation failed, otherwise a list of
6375 (host, instance_visible_name, node_visible_name)
6376 with the mapping from node devices to instance devices
6381 iname = instance.name
6382 disks = _ExpandCheckDisks(instance, disks)
6384 # With the two passes mechanism we try to reduce the window of
6385 # opportunity for the race condition of switching DRBD to primary
6386 # before handshaking occured, but we do not eliminate it
6388 # The proper fix would be to wait (with some limits) until the
6389 # connection has been made and drbd transitions from WFConnection
6390 # into any other network-connected state (Connected, SyncTarget,
6393 # 1st pass, assemble on all nodes in secondary mode
6394 for idx, inst_disk in enumerate(disks):
6395 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6397 node_disk = node_disk.Copy()
6398 node_disk.UnsetSize()
6399 lu.cfg.SetDiskID(node_disk, node)
6400 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6402 msg = result.fail_msg
6404 is_offline_secondary = (node in instance.secondary_nodes and
6406 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6407 " (is_primary=False, pass=1): %s",
6408 inst_disk.iv_name, node, msg)
6409 if not (ignore_secondaries or is_offline_secondary):
6412 # FIXME: race condition on drbd migration to primary
6414 # 2nd pass, do only the primary node
6415 for idx, inst_disk in enumerate(disks):
6418 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6419 if node != instance.primary_node:
6422 node_disk = node_disk.Copy()
6423 node_disk.UnsetSize()
6424 lu.cfg.SetDiskID(node_disk, node)
6425 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6427 msg = result.fail_msg
6429 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6430 " (is_primary=True, pass=2): %s",
6431 inst_disk.iv_name, node, msg)
6434 dev_path = result.payload
6436 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6438 # leave the disks configured for the primary node
6439 # this is a workaround that would be fixed better by
6440 # improving the logical/physical id handling
6442 lu.cfg.SetDiskID(disk, instance.primary_node)
6444 return disks_ok, device_info
6447 def _StartInstanceDisks(lu, instance, force):
6448 """Start the disks of an instance.
6451 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6452 ignore_secondaries=force)
6454 _ShutdownInstanceDisks(lu, instance)
6455 if force is not None and not force:
6456 lu.proc.LogWarning("", hint="If the message above refers to a"
6458 " you can retry the operation using '--force'.")
6459 raise errors.OpExecError("Disk consistency error")
6462 class LUInstanceDeactivateDisks(NoHooksLU):
6463 """Shutdown an instance's disks.
6468 def ExpandNames(self):
6469 self._ExpandAndLockInstance()
6470 self.needed_locks[locking.LEVEL_NODE] = []
6471 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6473 def DeclareLocks(self, level):
6474 if level == locking.LEVEL_NODE:
6475 self._LockInstancesNodes()
6477 def CheckPrereq(self):
6478 """Check prerequisites.
6480 This checks that the instance is in the cluster.
6483 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6484 assert self.instance is not None, \
6485 "Cannot retrieve locked instance %s" % self.op.instance_name
6487 def Exec(self, feedback_fn):
6488 """Deactivate the disks
6491 instance = self.instance
6493 _ShutdownInstanceDisks(self, instance)
6495 _SafeShutdownInstanceDisks(self, instance)
6498 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6499 """Shutdown block devices of an instance.
6501 This function checks if an instance is running, before calling
6502 _ShutdownInstanceDisks.
6505 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6506 _ShutdownInstanceDisks(lu, instance, disks=disks)
6509 def _ExpandCheckDisks(instance, disks):
6510 """Return the instance disks selected by the disks list
6512 @type disks: list of L{objects.Disk} or None
6513 @param disks: selected disks
6514 @rtype: list of L{objects.Disk}
6515 @return: selected instance disks to act on
6519 return instance.disks
6521 if not set(disks).issubset(instance.disks):
6522 raise errors.ProgrammerError("Can only act on disks belonging to the"
6527 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6528 """Shutdown block devices of an instance.
6530 This does the shutdown on all nodes of the instance.
6532 If the ignore_primary is false, errors on the primary node are
6537 disks = _ExpandCheckDisks(instance, disks)
6540 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6541 lu.cfg.SetDiskID(top_disk, node)
6542 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6543 msg = result.fail_msg
6545 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6546 disk.iv_name, node, msg)
6547 if ((node == instance.primary_node and not ignore_primary) or
6548 (node != instance.primary_node and not result.offline)):
6553 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6554 """Checks if a node has enough free memory.
6556 This function check if a given node has the needed amount of free
6557 memory. In case the node has less memory or we cannot get the
6558 information from the node, this function raise an OpPrereqError
6561 @type lu: C{LogicalUnit}
6562 @param lu: a logical unit from which we get configuration data
6564 @param node: the node to check
6565 @type reason: C{str}
6566 @param reason: string to use in the error message
6567 @type requested: C{int}
6568 @param requested: the amount of memory in MiB to check for
6569 @type hypervisor_name: C{str}
6570 @param hypervisor_name: the hypervisor to ask for memory stats
6572 @return: node current free memory
6573 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6574 we cannot check the node
6577 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6578 nodeinfo[node].Raise("Can't get data from node %s" % node,
6579 prereq=True, ecode=errors.ECODE_ENVIRON)
6580 (_, _, (hv_info, )) = nodeinfo[node].payload
6582 free_mem = hv_info.get("memory_free", None)
6583 if not isinstance(free_mem, int):
6584 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6585 " was '%s'" % (node, free_mem),
6586 errors.ECODE_ENVIRON)
6587 if requested > free_mem:
6588 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6589 " needed %s MiB, available %s MiB" %
6590 (node, reason, requested, free_mem),
6595 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6596 """Checks if nodes have enough free disk space in the all VGs.
6598 This function check if all given nodes have the needed amount of
6599 free disk. In case any node has less disk or we cannot get the
6600 information from the node, this function raise an OpPrereqError
6603 @type lu: C{LogicalUnit}
6604 @param lu: a logical unit from which we get configuration data
6605 @type nodenames: C{list}
6606 @param nodenames: the list of node names to check
6607 @type req_sizes: C{dict}
6608 @param req_sizes: the hash of vg and corresponding amount of disk in
6610 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6611 or we cannot check the node
6614 for vg, req_size in req_sizes.items():
6615 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6618 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6619 """Checks if nodes have enough free disk space in the specified VG.
6621 This function check if all given nodes have the needed amount of
6622 free disk. In case any node has less disk or we cannot get the
6623 information from the node, this function raise an OpPrereqError
6626 @type lu: C{LogicalUnit}
6627 @param lu: a logical unit from which we get configuration data
6628 @type nodenames: C{list}
6629 @param nodenames: the list of node names to check
6631 @param vg: the volume group to check
6632 @type requested: C{int}
6633 @param requested: the amount of disk in MiB to check for
6634 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6635 or we cannot check the node
6638 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6639 for node in nodenames:
6640 info = nodeinfo[node]
6641 info.Raise("Cannot get current information from node %s" % node,
6642 prereq=True, ecode=errors.ECODE_ENVIRON)
6643 (_, (vg_info, ), _) = info.payload
6644 vg_free = vg_info.get("vg_free", None)
6645 if not isinstance(vg_free, int):
6646 raise errors.OpPrereqError("Can't compute free disk space on node"
6647 " %s for vg %s, result was '%s'" %
6648 (node, vg, vg_free), errors.ECODE_ENVIRON)
6649 if requested > vg_free:
6650 raise errors.OpPrereqError("Not enough disk space on target node %s"
6651 " vg %s: required %d MiB, available %d MiB" %
6652 (node, vg, requested, vg_free),
6656 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6657 """Checks if nodes have enough physical CPUs
6659 This function checks if all given nodes have the needed number of
6660 physical CPUs. In case any node has less CPUs or we cannot get the
6661 information from the node, this function raises an OpPrereqError
6664 @type lu: C{LogicalUnit}
6665 @param lu: a logical unit from which we get configuration data
6666 @type nodenames: C{list}
6667 @param nodenames: the list of node names to check
6668 @type requested: C{int}
6669 @param requested: the minimum acceptable number of physical CPUs
6670 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6671 or we cannot check the node
6674 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6675 for node in nodenames:
6676 info = nodeinfo[node]
6677 info.Raise("Cannot get current information from node %s" % node,
6678 prereq=True, ecode=errors.ECODE_ENVIRON)
6679 (_, _, (hv_info, )) = info.payload
6680 num_cpus = hv_info.get("cpu_total", None)
6681 if not isinstance(num_cpus, int):
6682 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6683 " on node %s, result was '%s'" %
6684 (node, num_cpus), errors.ECODE_ENVIRON)
6685 if requested > num_cpus:
6686 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6687 "required" % (node, num_cpus, requested),
6691 class LUInstanceStartup(LogicalUnit):
6692 """Starts an instance.
6695 HPATH = "instance-start"
6696 HTYPE = constants.HTYPE_INSTANCE
6699 def CheckArguments(self):
6701 if self.op.beparams:
6702 # fill the beparams dict
6703 objects.UpgradeBeParams(self.op.beparams)
6704 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6706 def ExpandNames(self):
6707 self._ExpandAndLockInstance()
6708 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6710 def DeclareLocks(self, level):
6711 if level == locking.LEVEL_NODE_RES:
6712 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6714 def BuildHooksEnv(self):
6717 This runs on master, primary and secondary nodes of the instance.
6721 "FORCE": self.op.force,
6724 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6728 def BuildHooksNodes(self):
6729 """Build hooks nodes.
6732 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6735 def CheckPrereq(self):
6736 """Check prerequisites.
6738 This checks that the instance is in the cluster.
6741 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6742 assert self.instance is not None, \
6743 "Cannot retrieve locked instance %s" % self.op.instance_name
6746 if self.op.hvparams:
6747 # check hypervisor parameter syntax (locally)
6748 cluster = self.cfg.GetClusterInfo()
6749 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6750 filled_hvp = cluster.FillHV(instance)
6751 filled_hvp.update(self.op.hvparams)
6752 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6753 hv_type.CheckParameterSyntax(filled_hvp)
6754 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6756 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6758 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6760 if self.primary_offline and self.op.ignore_offline_nodes:
6761 self.proc.LogWarning("Ignoring offline primary node")
6763 if self.op.hvparams or self.op.beparams:
6764 self.proc.LogWarning("Overridden parameters are ignored")
6766 _CheckNodeOnline(self, instance.primary_node)
6768 bep = self.cfg.GetClusterInfo().FillBE(instance)
6769 bep.update(self.op.beparams)
6771 # check bridges existence
6772 _CheckInstanceBridgesExist(self, instance)
6774 remote_info = self.rpc.call_instance_info(instance.primary_node,
6776 instance.hypervisor)
6777 remote_info.Raise("Error checking node %s" % instance.primary_node,
6778 prereq=True, ecode=errors.ECODE_ENVIRON)
6779 if not remote_info.payload: # not running already
6780 _CheckNodeFreeMemory(self, instance.primary_node,
6781 "starting instance %s" % instance.name,
6782 bep[constants.BE_MINMEM], instance.hypervisor)
6784 def Exec(self, feedback_fn):
6785 """Start the instance.
6788 instance = self.instance
6789 force = self.op.force
6791 if not self.op.no_remember:
6792 self.cfg.MarkInstanceUp(instance.name)
6794 if self.primary_offline:
6795 assert self.op.ignore_offline_nodes
6796 self.proc.LogInfo("Primary node offline, marked instance as started")
6798 node_current = instance.primary_node
6800 _StartInstanceDisks(self, instance, force)
6803 self.rpc.call_instance_start(node_current,
6804 (instance, self.op.hvparams,
6806 self.op.startup_paused)
6807 msg = result.fail_msg
6809 _ShutdownInstanceDisks(self, instance)
6810 raise errors.OpExecError("Could not start instance: %s" % msg)
6813 class LUInstanceReboot(LogicalUnit):
6814 """Reboot an instance.
6817 HPATH = "instance-reboot"
6818 HTYPE = constants.HTYPE_INSTANCE
6821 def ExpandNames(self):
6822 self._ExpandAndLockInstance()
6824 def BuildHooksEnv(self):
6827 This runs on master, primary and secondary nodes of the instance.
6831 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6832 "REBOOT_TYPE": self.op.reboot_type,
6833 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6836 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6840 def BuildHooksNodes(self):
6841 """Build hooks nodes.
6844 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6847 def CheckPrereq(self):
6848 """Check prerequisites.
6850 This checks that the instance is in the cluster.
6853 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6854 assert self.instance is not None, \
6855 "Cannot retrieve locked instance %s" % self.op.instance_name
6856 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6857 _CheckNodeOnline(self, instance.primary_node)
6859 # check bridges existence
6860 _CheckInstanceBridgesExist(self, instance)
6862 def Exec(self, feedback_fn):
6863 """Reboot the instance.
6866 instance = self.instance
6867 ignore_secondaries = self.op.ignore_secondaries
6868 reboot_type = self.op.reboot_type
6870 remote_info = self.rpc.call_instance_info(instance.primary_node,
6872 instance.hypervisor)
6873 remote_info.Raise("Error checking node %s" % instance.primary_node)
6874 instance_running = bool(remote_info.payload)
6876 node_current = instance.primary_node
6878 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6879 constants.INSTANCE_REBOOT_HARD]:
6880 for disk in instance.disks:
6881 self.cfg.SetDiskID(disk, node_current)
6882 result = self.rpc.call_instance_reboot(node_current, instance,
6884 self.op.shutdown_timeout)
6885 result.Raise("Could not reboot instance")
6887 if instance_running:
6888 result = self.rpc.call_instance_shutdown(node_current, instance,
6889 self.op.shutdown_timeout)
6890 result.Raise("Could not shutdown instance for full reboot")
6891 _ShutdownInstanceDisks(self, instance)
6893 self.LogInfo("Instance %s was already stopped, starting now",
6895 _StartInstanceDisks(self, instance, ignore_secondaries)
6896 result = self.rpc.call_instance_start(node_current,
6897 (instance, None, None), False)
6898 msg = result.fail_msg
6900 _ShutdownInstanceDisks(self, instance)
6901 raise errors.OpExecError("Could not start instance for"
6902 " full reboot: %s" % msg)
6904 self.cfg.MarkInstanceUp(instance.name)
6907 class LUInstanceShutdown(LogicalUnit):
6908 """Shutdown an instance.
6911 HPATH = "instance-stop"
6912 HTYPE = constants.HTYPE_INSTANCE
6915 def ExpandNames(self):
6916 self._ExpandAndLockInstance()
6918 def BuildHooksEnv(self):
6921 This runs on master, primary and secondary nodes of the instance.
6924 env = _BuildInstanceHookEnvByObject(self, self.instance)
6925 env["TIMEOUT"] = self.op.timeout
6928 def BuildHooksNodes(self):
6929 """Build hooks nodes.
6932 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6935 def CheckPrereq(self):
6936 """Check prerequisites.
6938 This checks that the instance is in the cluster.
6941 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6942 assert self.instance is not None, \
6943 "Cannot retrieve locked instance %s" % self.op.instance_name
6945 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6947 self.primary_offline = \
6948 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6950 if self.primary_offline and self.op.ignore_offline_nodes:
6951 self.proc.LogWarning("Ignoring offline primary node")
6953 _CheckNodeOnline(self, self.instance.primary_node)
6955 def Exec(self, feedback_fn):
6956 """Shutdown the instance.
6959 instance = self.instance
6960 node_current = instance.primary_node
6961 timeout = self.op.timeout
6963 if not self.op.no_remember:
6964 self.cfg.MarkInstanceDown(instance.name)
6966 if self.primary_offline:
6967 assert self.op.ignore_offline_nodes
6968 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6970 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6971 msg = result.fail_msg
6973 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6975 _ShutdownInstanceDisks(self, instance)
6978 class LUInstanceReinstall(LogicalUnit):
6979 """Reinstall an instance.
6982 HPATH = "instance-reinstall"
6983 HTYPE = constants.HTYPE_INSTANCE
6986 def ExpandNames(self):
6987 self._ExpandAndLockInstance()
6989 def BuildHooksEnv(self):
6992 This runs on master, primary and secondary nodes of the instance.
6995 return _BuildInstanceHookEnvByObject(self, self.instance)
6997 def BuildHooksNodes(self):
6998 """Build hooks nodes.
7001 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7004 def CheckPrereq(self):
7005 """Check prerequisites.
7007 This checks that the instance is in the cluster and is not running.
7010 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7011 assert instance is not None, \
7012 "Cannot retrieve locked instance %s" % self.op.instance_name
7013 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7014 " offline, cannot reinstall")
7016 if instance.disk_template == constants.DT_DISKLESS:
7017 raise errors.OpPrereqError("Instance '%s' has no disks" %
7018 self.op.instance_name,
7020 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7022 if self.op.os_type is not None:
7024 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7025 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7026 instance_os = self.op.os_type
7028 instance_os = instance.os
7030 nodelist = list(instance.all_nodes)
7032 if self.op.osparams:
7033 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7034 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7035 self.os_inst = i_osdict # the new dict (without defaults)
7039 self.instance = instance
7041 def Exec(self, feedback_fn):
7042 """Reinstall the instance.
7045 inst = self.instance
7047 if self.op.os_type is not None:
7048 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7049 inst.os = self.op.os_type
7050 # Write to configuration
7051 self.cfg.Update(inst, feedback_fn)
7053 _StartInstanceDisks(self, inst, None)
7055 feedback_fn("Running the instance OS create scripts...")
7056 # FIXME: pass debug option from opcode to backend
7057 result = self.rpc.call_instance_os_add(inst.primary_node,
7058 (inst, self.os_inst), True,
7059 self.op.debug_level)
7060 result.Raise("Could not install OS for instance %s on node %s" %
7061 (inst.name, inst.primary_node))
7063 _ShutdownInstanceDisks(self, inst)
7066 class LUInstanceRecreateDisks(LogicalUnit):
7067 """Recreate an instance's missing disks.
7070 HPATH = "instance-recreate-disks"
7071 HTYPE = constants.HTYPE_INSTANCE
7074 _MODIFYABLE = frozenset([
7075 constants.IDISK_SIZE,
7076 constants.IDISK_MODE,
7079 # New or changed disk parameters may have different semantics
7080 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7081 constants.IDISK_ADOPT,
7083 # TODO: Implement support changing VG while recreating
7085 constants.IDISK_METAVG,
7088 def _RunAllocator(self):
7089 """Run the allocator based on input opcode.
7092 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7095 # The allocator should actually run in "relocate" mode, but current
7096 # allocators don't support relocating all the nodes of an instance at
7097 # the same time. As a workaround we use "allocate" mode, but this is
7098 # suboptimal for two reasons:
7099 # - The instance name passed to the allocator is present in the list of
7100 # existing instances, so there could be a conflict within the
7101 # internal structures of the allocator. This doesn't happen with the
7102 # current allocators, but it's a liability.
7103 # - The allocator counts the resources used by the instance twice: once
7104 # because the instance exists already, and once because it tries to
7105 # allocate a new instance.
7106 # The allocator could choose some of the nodes on which the instance is
7107 # running, but that's not a problem. If the instance nodes are broken,
7108 # they should be already be marked as drained or offline, and hence
7109 # skipped by the allocator. If instance disks have been lost for other
7110 # reasons, then recreating the disks on the same nodes should be fine.
7111 ial = IAllocator(self.cfg, self.rpc,
7112 mode=constants.IALLOCATOR_MODE_ALLOC,
7113 name=self.op.instance_name,
7114 disk_template=self.instance.disk_template,
7115 tags=list(self.instance.GetTags()),
7116 os=self.instance.os,
7118 vcpus=be_full[constants.BE_VCPUS],
7119 memory=be_full[constants.BE_MAXMEM],
7120 spindle_use=be_full[constants.BE_SPINDLE_USE],
7121 disks=[{constants.IDISK_SIZE: d.size,
7122 constants.IDISK_MODE: d.mode}
7123 for d in self.instance.disks],
7124 hypervisor=self.instance.hypervisor)
7126 assert ial.required_nodes == len(self.instance.all_nodes)
7128 ial.Run(self.op.iallocator)
7131 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7132 " %s" % (self.op.iallocator, ial.info),
7135 if len(ial.result) != ial.required_nodes:
7136 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7137 " of nodes (%s), required %s" %
7138 (self.op.iallocator, len(ial.result),
7139 ial.required_nodes), errors.ECODE_FAULT)
7141 self.op.nodes = ial.result
7142 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7143 self.op.instance_name, self.op.iallocator,
7144 utils.CommaJoin(ial.result))
7146 def CheckArguments(self):
7147 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7148 # Normalize and convert deprecated list of disk indices
7149 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7151 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7153 raise errors.OpPrereqError("Some disks have been specified more than"
7154 " once: %s" % utils.CommaJoin(duplicates),
7157 for (idx, params) in self.op.disks:
7158 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7159 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7161 raise errors.OpPrereqError("Parameters for disk %s try to change"
7162 " unmodifyable parameter(s): %s" %
7163 (idx, utils.CommaJoin(unsupported)),
7166 def ExpandNames(self):
7167 self._ExpandAndLockInstance()
7168 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7170 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7171 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7173 self.needed_locks[locking.LEVEL_NODE] = []
7174 self.needed_locks[locking.LEVEL_NODE_RES] = []
7176 def DeclareLocks(self, level):
7177 if level == locking.LEVEL_NODE:
7178 # if we replace the nodes, we only need to lock the old primary,
7179 # otherwise we need to lock all nodes for disk re-creation
7180 primary_only = bool(self.op.nodes)
7181 self._LockInstancesNodes(primary_only=primary_only)
7182 elif level == locking.LEVEL_NODE_RES:
7184 self.needed_locks[locking.LEVEL_NODE_RES] = \
7185 self.needed_locks[locking.LEVEL_NODE][:]
7187 def BuildHooksEnv(self):
7190 This runs on master, primary and secondary nodes of the instance.
7193 return _BuildInstanceHookEnvByObject(self, self.instance)
7195 def BuildHooksNodes(self):
7196 """Build hooks nodes.
7199 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7202 def CheckPrereq(self):
7203 """Check prerequisites.
7205 This checks that the instance is in the cluster and is not running.
7208 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7209 assert instance is not None, \
7210 "Cannot retrieve locked instance %s" % self.op.instance_name
7212 if len(self.op.nodes) != len(instance.all_nodes):
7213 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7214 " %d replacement nodes were specified" %
7215 (instance.name, len(instance.all_nodes),
7216 len(self.op.nodes)),
7218 assert instance.disk_template != constants.DT_DRBD8 or \
7219 len(self.op.nodes) == 2
7220 assert instance.disk_template != constants.DT_PLAIN or \
7221 len(self.op.nodes) == 1
7222 primary_node = self.op.nodes[0]
7224 primary_node = instance.primary_node
7225 _CheckNodeOnline(self, primary_node)
7227 if instance.disk_template == constants.DT_DISKLESS:
7228 raise errors.OpPrereqError("Instance '%s' has no disks" %
7229 self.op.instance_name, errors.ECODE_INVAL)
7231 # if we replace nodes *and* the old primary is offline, we don't
7233 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7234 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7235 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7236 if not (self.op.nodes and old_pnode.offline):
7237 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7238 msg="cannot recreate disks")
7241 self.disks = dict(self.op.disks)
7243 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7245 maxidx = max(self.disks.keys())
7246 if maxidx >= len(instance.disks):
7247 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7250 if (self.op.nodes and
7251 sorted(self.disks.keys()) != range(len(instance.disks))):
7252 raise errors.OpPrereqError("Can't recreate disks partially and"
7253 " change the nodes at the same time",
7256 self.instance = instance
7258 def Exec(self, feedback_fn):
7259 """Recreate the disks.
7262 instance = self.instance
7264 assert (self.owned_locks(locking.LEVEL_NODE) ==
7265 self.owned_locks(locking.LEVEL_NODE_RES))
7268 mods = [] # keeps track of needed changes
7270 for idx, disk in enumerate(instance.disks):
7272 changes = self.disks[idx]
7274 # Disk should not be recreated
7278 # update secondaries for disks, if needed
7279 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7280 # need to update the nodes and minors
7281 assert len(self.op.nodes) == 2
7282 assert len(disk.logical_id) == 6 # otherwise disk internals
7284 (_, _, old_port, _, _, old_secret) = disk.logical_id
7285 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7286 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7287 new_minors[0], new_minors[1], old_secret)
7288 assert len(disk.logical_id) == len(new_id)
7292 mods.append((idx, new_id, changes))
7294 # now that we have passed all asserts above, we can apply the mods
7295 # in a single run (to avoid partial changes)
7296 for idx, new_id, changes in mods:
7297 disk = instance.disks[idx]
7298 if new_id is not None:
7299 assert disk.dev_type == constants.LD_DRBD8
7300 disk.logical_id = new_id
7302 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7303 mode=changes.get(constants.IDISK_MODE, None))
7305 # change primary node, if needed
7307 instance.primary_node = self.op.nodes[0]
7308 self.LogWarning("Changing the instance's nodes, you will have to"
7309 " remove any disks left on the older nodes manually")
7312 self.cfg.Update(instance, feedback_fn)
7314 _CreateDisks(self, instance, to_skip=to_skip)
7317 class LUInstanceRename(LogicalUnit):
7318 """Rename an instance.
7321 HPATH = "instance-rename"
7322 HTYPE = constants.HTYPE_INSTANCE
7324 def CheckArguments(self):
7328 if self.op.ip_check and not self.op.name_check:
7329 # TODO: make the ip check more flexible and not depend on the name check
7330 raise errors.OpPrereqError("IP address check requires a name check",
7333 def BuildHooksEnv(self):
7336 This runs on master, primary and secondary nodes of the instance.
7339 env = _BuildInstanceHookEnvByObject(self, self.instance)
7340 env["INSTANCE_NEW_NAME"] = self.op.new_name
7343 def BuildHooksNodes(self):
7344 """Build hooks nodes.
7347 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7350 def CheckPrereq(self):
7351 """Check prerequisites.
7353 This checks that the instance is in the cluster and is not running.
7356 self.op.instance_name = _ExpandInstanceName(self.cfg,
7357 self.op.instance_name)
7358 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7359 assert instance is not None
7360 _CheckNodeOnline(self, instance.primary_node)
7361 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7362 msg="cannot rename")
7363 self.instance = instance
7365 new_name = self.op.new_name
7366 if self.op.name_check:
7367 hostname = netutils.GetHostname(name=new_name)
7368 if hostname.name != new_name:
7369 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7371 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7372 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7373 " same as given hostname '%s'") %
7374 (hostname.name, self.op.new_name),
7376 new_name = self.op.new_name = hostname.name
7377 if (self.op.ip_check and
7378 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7379 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7380 (hostname.ip, new_name),
7381 errors.ECODE_NOTUNIQUE)
7383 instance_list = self.cfg.GetInstanceList()
7384 if new_name in instance_list and new_name != instance.name:
7385 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7386 new_name, errors.ECODE_EXISTS)
7388 def Exec(self, feedback_fn):
7389 """Rename the instance.
7392 inst = self.instance
7393 old_name = inst.name
7395 rename_file_storage = False
7396 if (inst.disk_template in constants.DTS_FILEBASED and
7397 self.op.new_name != inst.name):
7398 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7399 rename_file_storage = True
7401 self.cfg.RenameInstance(inst.name, self.op.new_name)
7402 # Change the instance lock. This is definitely safe while we hold the BGL.
7403 # Otherwise the new lock would have to be added in acquired mode.
7405 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7406 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7408 # re-read the instance from the configuration after rename
7409 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7411 if rename_file_storage:
7412 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7413 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7414 old_file_storage_dir,
7415 new_file_storage_dir)
7416 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7417 " (but the instance has been renamed in Ganeti)" %
7418 (inst.primary_node, old_file_storage_dir,
7419 new_file_storage_dir))
7421 _StartInstanceDisks(self, inst, None)
7423 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7424 old_name, self.op.debug_level)
7425 msg = result.fail_msg
7427 msg = ("Could not run OS rename script for instance %s on node %s"
7428 " (but the instance has been renamed in Ganeti): %s" %
7429 (inst.name, inst.primary_node, msg))
7430 self.proc.LogWarning(msg)
7432 _ShutdownInstanceDisks(self, inst)
7437 class LUInstanceRemove(LogicalUnit):
7438 """Remove an instance.
7441 HPATH = "instance-remove"
7442 HTYPE = constants.HTYPE_INSTANCE
7445 def ExpandNames(self):
7446 self._ExpandAndLockInstance()
7447 self.needed_locks[locking.LEVEL_NODE] = []
7448 self.needed_locks[locking.LEVEL_NODE_RES] = []
7449 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7451 def DeclareLocks(self, level):
7452 if level == locking.LEVEL_NODE:
7453 self._LockInstancesNodes()
7454 elif level == locking.LEVEL_NODE_RES:
7456 self.needed_locks[locking.LEVEL_NODE_RES] = \
7457 self.needed_locks[locking.LEVEL_NODE][:]
7459 def BuildHooksEnv(self):
7462 This runs on master, primary and secondary nodes of the instance.
7465 env = _BuildInstanceHookEnvByObject(self, self.instance)
7466 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7469 def BuildHooksNodes(self):
7470 """Build hooks nodes.
7473 nl = [self.cfg.GetMasterNode()]
7474 nl_post = list(self.instance.all_nodes) + nl
7475 return (nl, nl_post)
7477 def CheckPrereq(self):
7478 """Check prerequisites.
7480 This checks that the instance is in the cluster.
7483 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7484 assert self.instance is not None, \
7485 "Cannot retrieve locked instance %s" % self.op.instance_name
7487 def Exec(self, feedback_fn):
7488 """Remove the instance.
7491 instance = self.instance
7492 logging.info("Shutting down instance %s on node %s",
7493 instance.name, instance.primary_node)
7495 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7496 self.op.shutdown_timeout)
7497 msg = result.fail_msg
7499 if self.op.ignore_failures:
7500 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7502 raise errors.OpExecError("Could not shutdown instance %s on"
7504 (instance.name, instance.primary_node, msg))
7506 assert (self.owned_locks(locking.LEVEL_NODE) ==
7507 self.owned_locks(locking.LEVEL_NODE_RES))
7508 assert not (set(instance.all_nodes) -
7509 self.owned_locks(locking.LEVEL_NODE)), \
7510 "Not owning correct locks"
7512 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7515 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7516 """Utility function to remove an instance.
7519 logging.info("Removing block devices for instance %s", instance.name)
7521 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7522 if not ignore_failures:
7523 raise errors.OpExecError("Can't remove instance's disks")
7524 feedback_fn("Warning: can't remove instance's disks")
7526 logging.info("Removing instance %s out of cluster config", instance.name)
7528 lu.cfg.RemoveInstance(instance.name)
7530 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7531 "Instance lock removal conflict"
7533 # Remove lock for the instance
7534 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7537 class LUInstanceQuery(NoHooksLU):
7538 """Logical unit for querying instances.
7541 # pylint: disable=W0142
7544 def CheckArguments(self):
7545 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7546 self.op.output_fields, self.op.use_locking)
7548 def ExpandNames(self):
7549 self.iq.ExpandNames(self)
7551 def DeclareLocks(self, level):
7552 self.iq.DeclareLocks(self, level)
7554 def Exec(self, feedback_fn):
7555 return self.iq.OldStyleQuery(self)
7558 class LUInstanceFailover(LogicalUnit):
7559 """Failover an instance.
7562 HPATH = "instance-failover"
7563 HTYPE = constants.HTYPE_INSTANCE
7566 def CheckArguments(self):
7567 """Check the arguments.
7570 self.iallocator = getattr(self.op, "iallocator", None)
7571 self.target_node = getattr(self.op, "target_node", None)
7573 def ExpandNames(self):
7574 self._ExpandAndLockInstance()
7576 if self.op.target_node is not None:
7577 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7579 self.needed_locks[locking.LEVEL_NODE] = []
7580 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7582 self.needed_locks[locking.LEVEL_NODE_RES] = []
7583 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7585 ignore_consistency = self.op.ignore_consistency
7586 shutdown_timeout = self.op.shutdown_timeout
7587 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7590 ignore_consistency=ignore_consistency,
7591 shutdown_timeout=shutdown_timeout,
7592 ignore_ipolicy=self.op.ignore_ipolicy)
7593 self.tasklets = [self._migrater]
7595 def DeclareLocks(self, level):
7596 if level == locking.LEVEL_NODE:
7597 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7598 if instance.disk_template in constants.DTS_EXT_MIRROR:
7599 if self.op.target_node is None:
7600 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7602 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7603 self.op.target_node]
7604 del self.recalculate_locks[locking.LEVEL_NODE]
7606 self._LockInstancesNodes()
7607 elif level == locking.LEVEL_NODE_RES:
7609 self.needed_locks[locking.LEVEL_NODE_RES] = \
7610 self.needed_locks[locking.LEVEL_NODE][:]
7612 def BuildHooksEnv(self):
7615 This runs on master, primary and secondary nodes of the instance.
7618 instance = self._migrater.instance
7619 source_node = instance.primary_node
7620 target_node = self.op.target_node
7622 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7623 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7624 "OLD_PRIMARY": source_node,
7625 "NEW_PRIMARY": target_node,
7628 if instance.disk_template in constants.DTS_INT_MIRROR:
7629 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7630 env["NEW_SECONDARY"] = source_node
7632 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7634 env.update(_BuildInstanceHookEnvByObject(self, instance))
7638 def BuildHooksNodes(self):
7639 """Build hooks nodes.
7642 instance = self._migrater.instance
7643 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7644 return (nl, nl + [instance.primary_node])
7647 class LUInstanceMigrate(LogicalUnit):
7648 """Migrate an instance.
7650 This is migration without shutting down, compared to the failover,
7651 which is done with shutdown.
7654 HPATH = "instance-migrate"
7655 HTYPE = constants.HTYPE_INSTANCE
7658 def ExpandNames(self):
7659 self._ExpandAndLockInstance()
7661 if self.op.target_node is not None:
7662 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7664 self.needed_locks[locking.LEVEL_NODE] = []
7665 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7667 self.needed_locks[locking.LEVEL_NODE] = []
7668 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7671 TLMigrateInstance(self, self.op.instance_name,
7672 cleanup=self.op.cleanup,
7674 fallback=self.op.allow_failover,
7675 allow_runtime_changes=self.op.allow_runtime_changes,
7676 ignore_ipolicy=self.op.ignore_ipolicy)
7677 self.tasklets = [self._migrater]
7679 def DeclareLocks(self, level):
7680 if level == locking.LEVEL_NODE:
7681 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7682 if instance.disk_template in constants.DTS_EXT_MIRROR:
7683 if self.op.target_node is None:
7684 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7686 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7687 self.op.target_node]
7688 del self.recalculate_locks[locking.LEVEL_NODE]
7690 self._LockInstancesNodes()
7691 elif level == locking.LEVEL_NODE_RES:
7693 self.needed_locks[locking.LEVEL_NODE_RES] = \
7694 self.needed_locks[locking.LEVEL_NODE][:]
7696 def BuildHooksEnv(self):
7699 This runs on master, primary and secondary nodes of the instance.
7702 instance = self._migrater.instance
7703 source_node = instance.primary_node
7704 target_node = self.op.target_node
7705 env = _BuildInstanceHookEnvByObject(self, instance)
7707 "MIGRATE_LIVE": self._migrater.live,
7708 "MIGRATE_CLEANUP": self.op.cleanup,
7709 "OLD_PRIMARY": source_node,
7710 "NEW_PRIMARY": target_node,
7711 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7714 if instance.disk_template in constants.DTS_INT_MIRROR:
7715 env["OLD_SECONDARY"] = target_node
7716 env["NEW_SECONDARY"] = source_node
7718 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7722 def BuildHooksNodes(self):
7723 """Build hooks nodes.
7726 instance = self._migrater.instance
7727 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7728 return (nl, nl + [instance.primary_node])
7731 class LUInstanceMove(LogicalUnit):
7732 """Move an instance by data-copying.
7735 HPATH = "instance-move"
7736 HTYPE = constants.HTYPE_INSTANCE
7739 def ExpandNames(self):
7740 self._ExpandAndLockInstance()
7741 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7742 self.op.target_node = target_node
7743 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7744 self.needed_locks[locking.LEVEL_NODE_RES] = []
7745 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7747 def DeclareLocks(self, level):
7748 if level == locking.LEVEL_NODE:
7749 self._LockInstancesNodes(primary_only=True)
7750 elif level == locking.LEVEL_NODE_RES:
7752 self.needed_locks[locking.LEVEL_NODE_RES] = \
7753 self.needed_locks[locking.LEVEL_NODE][:]
7755 def BuildHooksEnv(self):
7758 This runs on master, primary and secondary nodes of the instance.
7762 "TARGET_NODE": self.op.target_node,
7763 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7765 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7768 def BuildHooksNodes(self):
7769 """Build hooks nodes.
7773 self.cfg.GetMasterNode(),
7774 self.instance.primary_node,
7775 self.op.target_node,
7779 def CheckPrereq(self):
7780 """Check prerequisites.
7782 This checks that the instance is in the cluster.
7785 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7786 assert self.instance is not None, \
7787 "Cannot retrieve locked instance %s" % self.op.instance_name
7789 node = self.cfg.GetNodeInfo(self.op.target_node)
7790 assert node is not None, \
7791 "Cannot retrieve locked node %s" % self.op.target_node
7793 self.target_node = target_node = node.name
7795 if target_node == instance.primary_node:
7796 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7797 (instance.name, target_node),
7800 bep = self.cfg.GetClusterInfo().FillBE(instance)
7802 for idx, dsk in enumerate(instance.disks):
7803 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7804 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7805 " cannot copy" % idx, errors.ECODE_STATE)
7807 _CheckNodeOnline(self, target_node)
7808 _CheckNodeNotDrained(self, target_node)
7809 _CheckNodeVmCapable(self, target_node)
7810 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7811 self.cfg.GetNodeGroup(node.group))
7812 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7813 ignore=self.op.ignore_ipolicy)
7815 if instance.admin_state == constants.ADMINST_UP:
7816 # check memory requirements on the secondary node
7817 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7818 instance.name, bep[constants.BE_MAXMEM],
7819 instance.hypervisor)
7821 self.LogInfo("Not checking memory on the secondary node as"
7822 " instance will not be started")
7824 # check bridge existance
7825 _CheckInstanceBridgesExist(self, instance, node=target_node)
7827 def Exec(self, feedback_fn):
7828 """Move an instance.
7830 The move is done by shutting it down on its present node, copying
7831 the data over (slow) and starting it on the new node.
7834 instance = self.instance
7836 source_node = instance.primary_node
7837 target_node = self.target_node
7839 self.LogInfo("Shutting down instance %s on source node %s",
7840 instance.name, source_node)
7842 assert (self.owned_locks(locking.LEVEL_NODE) ==
7843 self.owned_locks(locking.LEVEL_NODE_RES))
7845 result = self.rpc.call_instance_shutdown(source_node, instance,
7846 self.op.shutdown_timeout)
7847 msg = result.fail_msg
7849 if self.op.ignore_consistency:
7850 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7851 " Proceeding anyway. Please make sure node"
7852 " %s is down. Error details: %s",
7853 instance.name, source_node, source_node, msg)
7855 raise errors.OpExecError("Could not shutdown instance %s on"
7857 (instance.name, source_node, msg))
7859 # create the target disks
7861 _CreateDisks(self, instance, target_node=target_node)
7862 except errors.OpExecError:
7863 self.LogWarning("Device creation failed, reverting...")
7865 _RemoveDisks(self, instance, target_node=target_node)
7867 self.cfg.ReleaseDRBDMinors(instance.name)
7870 cluster_name = self.cfg.GetClusterInfo().cluster_name
7873 # activate, get path, copy the data over
7874 for idx, disk in enumerate(instance.disks):
7875 self.LogInfo("Copying data for disk %d", idx)
7876 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7877 instance.name, True, idx)
7879 self.LogWarning("Can't assemble newly created disk %d: %s",
7880 idx, result.fail_msg)
7881 errs.append(result.fail_msg)
7883 dev_path = result.payload
7884 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7885 target_node, dev_path,
7888 self.LogWarning("Can't copy data over for disk %d: %s",
7889 idx, result.fail_msg)
7890 errs.append(result.fail_msg)
7894 self.LogWarning("Some disks failed to copy, aborting")
7896 _RemoveDisks(self, instance, target_node=target_node)
7898 self.cfg.ReleaseDRBDMinors(instance.name)
7899 raise errors.OpExecError("Errors during disk copy: %s" %
7902 instance.primary_node = target_node
7903 self.cfg.Update(instance, feedback_fn)
7905 self.LogInfo("Removing the disks on the original node")
7906 _RemoveDisks(self, instance, target_node=source_node)
7908 # Only start the instance if it's marked as up
7909 if instance.admin_state == constants.ADMINST_UP:
7910 self.LogInfo("Starting instance %s on node %s",
7911 instance.name, target_node)
7913 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7914 ignore_secondaries=True)
7916 _ShutdownInstanceDisks(self, instance)
7917 raise errors.OpExecError("Can't activate the instance's disks")
7919 result = self.rpc.call_instance_start(target_node,
7920 (instance, None, None), False)
7921 msg = result.fail_msg
7923 _ShutdownInstanceDisks(self, instance)
7924 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7925 (instance.name, target_node, msg))
7928 class LUNodeMigrate(LogicalUnit):
7929 """Migrate all instances from a node.
7932 HPATH = "node-migrate"
7933 HTYPE = constants.HTYPE_NODE
7936 def CheckArguments(self):
7939 def ExpandNames(self):
7940 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7942 self.share_locks = _ShareAll()
7943 self.needed_locks = {
7944 locking.LEVEL_NODE: [self.op.node_name],
7947 def BuildHooksEnv(self):
7950 This runs on the master, the primary and all the secondaries.
7954 "NODE_NAME": self.op.node_name,
7955 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7958 def BuildHooksNodes(self):
7959 """Build hooks nodes.
7962 nl = [self.cfg.GetMasterNode()]
7965 def CheckPrereq(self):
7968 def Exec(self, feedback_fn):
7969 # Prepare jobs for migration instances
7970 allow_runtime_changes = self.op.allow_runtime_changes
7972 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7975 iallocator=self.op.iallocator,
7976 target_node=self.op.target_node,
7977 allow_runtime_changes=allow_runtime_changes,
7978 ignore_ipolicy=self.op.ignore_ipolicy)]
7979 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7982 # TODO: Run iallocator in this opcode and pass correct placement options to
7983 # OpInstanceMigrate. Since other jobs can modify the cluster between
7984 # running the iallocator and the actual migration, a good consistency model
7985 # will have to be found.
7987 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7988 frozenset([self.op.node_name]))
7990 return ResultWithJobs(jobs)
7993 class TLMigrateInstance(Tasklet):
7994 """Tasklet class for instance migration.
7997 @ivar live: whether the migration will be done live or non-live;
7998 this variable is initalized only after CheckPrereq has run
7999 @type cleanup: boolean
8000 @ivar cleanup: Wheater we cleanup from a failed migration
8001 @type iallocator: string
8002 @ivar iallocator: The iallocator used to determine target_node
8003 @type target_node: string
8004 @ivar target_node: If given, the target_node to reallocate the instance to
8005 @type failover: boolean
8006 @ivar failover: Whether operation results in failover or migration
8007 @type fallback: boolean
8008 @ivar fallback: Whether fallback to failover is allowed if migration not
8010 @type ignore_consistency: boolean
8011 @ivar ignore_consistency: Wheter we should ignore consistency between source
8013 @type shutdown_timeout: int
8014 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8015 @type ignore_ipolicy: bool
8016 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8021 _MIGRATION_POLL_INTERVAL = 1 # seconds
8022 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8024 def __init__(self, lu, instance_name, cleanup=False,
8025 failover=False, fallback=False,
8026 ignore_consistency=False,
8027 allow_runtime_changes=True,
8028 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8029 ignore_ipolicy=False):
8030 """Initializes this class.
8033 Tasklet.__init__(self, lu)
8036 self.instance_name = instance_name
8037 self.cleanup = cleanup
8038 self.live = False # will be overridden later
8039 self.failover = failover
8040 self.fallback = fallback
8041 self.ignore_consistency = ignore_consistency
8042 self.shutdown_timeout = shutdown_timeout
8043 self.ignore_ipolicy = ignore_ipolicy
8044 self.allow_runtime_changes = allow_runtime_changes
8046 def CheckPrereq(self):
8047 """Check prerequisites.
8049 This checks that the instance is in the cluster.
8052 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8053 instance = self.cfg.GetInstanceInfo(instance_name)
8054 assert instance is not None
8055 self.instance = instance
8056 cluster = self.cfg.GetClusterInfo()
8058 if (not self.cleanup and
8059 not instance.admin_state == constants.ADMINST_UP and
8060 not self.failover and self.fallback):
8061 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8062 " switching to failover")
8063 self.failover = True
8065 if instance.disk_template not in constants.DTS_MIRRORED:
8070 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8071 " %s" % (instance.disk_template, text),
8074 if instance.disk_template in constants.DTS_EXT_MIRROR:
8075 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8077 if self.lu.op.iallocator:
8078 self._RunAllocator()
8080 # We set set self.target_node as it is required by
8082 self.target_node = self.lu.op.target_node
8084 # Check that the target node is correct in terms of instance policy
8085 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8086 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8087 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8088 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8089 ignore=self.ignore_ipolicy)
8091 # self.target_node is already populated, either directly or by the
8093 target_node = self.target_node
8094 if self.target_node == instance.primary_node:
8095 raise errors.OpPrereqError("Cannot migrate instance %s"
8096 " to its primary (%s)" %
8097 (instance.name, instance.primary_node))
8099 if len(self.lu.tasklets) == 1:
8100 # It is safe to release locks only when we're the only tasklet
8102 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8103 keep=[instance.primary_node, self.target_node])
8106 secondary_nodes = instance.secondary_nodes
8107 if not secondary_nodes:
8108 raise errors.ConfigurationError("No secondary node but using"
8109 " %s disk template" %
8110 instance.disk_template)
8111 target_node = secondary_nodes[0]
8112 if self.lu.op.iallocator or (self.lu.op.target_node and
8113 self.lu.op.target_node != target_node):
8115 text = "failed over"
8118 raise errors.OpPrereqError("Instances with disk template %s cannot"
8119 " be %s to arbitrary nodes"
8120 " (neither an iallocator nor a target"
8121 " node can be passed)" %
8122 (instance.disk_template, text),
8124 nodeinfo = self.cfg.GetNodeInfo(target_node)
8125 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8126 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8127 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8128 ignore=self.ignore_ipolicy)
8130 i_be = cluster.FillBE(instance)
8132 # check memory requirements on the secondary node
8133 if (not self.cleanup and
8134 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8135 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8136 "migrating instance %s" %
8138 i_be[constants.BE_MINMEM],
8139 instance.hypervisor)
8141 self.lu.LogInfo("Not checking memory on the secondary node as"
8142 " instance will not be started")
8144 # check if failover must be forced instead of migration
8145 if (not self.cleanup and not self.failover and
8146 i_be[constants.BE_ALWAYS_FAILOVER]):
8148 self.lu.LogInfo("Instance configured to always failover; fallback"
8150 self.failover = True
8152 raise errors.OpPrereqError("This instance has been configured to"
8153 " always failover, please allow failover",
8156 # check bridge existance
8157 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8159 if not self.cleanup:
8160 _CheckNodeNotDrained(self.lu, target_node)
8161 if not self.failover:
8162 result = self.rpc.call_instance_migratable(instance.primary_node,
8164 if result.fail_msg and self.fallback:
8165 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8167 self.failover = True
8169 result.Raise("Can't migrate, please use failover",
8170 prereq=True, ecode=errors.ECODE_STATE)
8172 assert not (self.failover and self.cleanup)
8174 if not self.failover:
8175 if self.lu.op.live is not None and self.lu.op.mode is not None:
8176 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8177 " parameters are accepted",
8179 if self.lu.op.live is not None:
8181 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8183 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8184 # reset the 'live' parameter to None so that repeated
8185 # invocations of CheckPrereq do not raise an exception
8186 self.lu.op.live = None
8187 elif self.lu.op.mode is None:
8188 # read the default value from the hypervisor
8189 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8190 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8192 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8194 # Failover is never live
8197 if not (self.failover or self.cleanup):
8198 remote_info = self.rpc.call_instance_info(instance.primary_node,
8200 instance.hypervisor)
8201 remote_info.Raise("Error checking instance on node %s" %
8202 instance.primary_node)
8203 instance_running = bool(remote_info.payload)
8204 if instance_running:
8205 self.current_mem = int(remote_info.payload["memory"])
8207 def _RunAllocator(self):
8208 """Run the allocator based on input opcode.
8211 # FIXME: add a self.ignore_ipolicy option
8212 ial = IAllocator(self.cfg, self.rpc,
8213 mode=constants.IALLOCATOR_MODE_RELOC,
8214 name=self.instance_name,
8215 relocate_from=[self.instance.primary_node],
8218 ial.Run(self.lu.op.iallocator)
8221 raise errors.OpPrereqError("Can't compute nodes using"
8222 " iallocator '%s': %s" %
8223 (self.lu.op.iallocator, ial.info),
8225 if len(ial.result) != ial.required_nodes:
8226 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8227 " of nodes (%s), required %s" %
8228 (self.lu.op.iallocator, len(ial.result),
8229 ial.required_nodes), errors.ECODE_FAULT)
8230 self.target_node = ial.result[0]
8231 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8232 self.instance_name, self.lu.op.iallocator,
8233 utils.CommaJoin(ial.result))
8235 def _WaitUntilSync(self):
8236 """Poll with custom rpc for disk sync.
8238 This uses our own step-based rpc call.
8241 self.feedback_fn("* wait until resync is done")
8245 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8247 (self.instance.disks,
8250 for node, nres in result.items():
8251 nres.Raise("Cannot resync disks on node %s" % node)
8252 node_done, node_percent = nres.payload
8253 all_done = all_done and node_done
8254 if node_percent is not None:
8255 min_percent = min(min_percent, node_percent)
8257 if min_percent < 100:
8258 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8261 def _EnsureSecondary(self, node):
8262 """Demote a node to secondary.
8265 self.feedback_fn("* switching node %s to secondary mode" % node)
8267 for dev in self.instance.disks:
8268 self.cfg.SetDiskID(dev, node)
8270 result = self.rpc.call_blockdev_close(node, self.instance.name,
8271 self.instance.disks)
8272 result.Raise("Cannot change disk to secondary on node %s" % node)
8274 def _GoStandalone(self):
8275 """Disconnect from the network.
8278 self.feedback_fn("* changing into standalone mode")
8279 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8280 self.instance.disks)
8281 for node, nres in result.items():
8282 nres.Raise("Cannot disconnect disks node %s" % node)
8284 def _GoReconnect(self, multimaster):
8285 """Reconnect to the network.
8291 msg = "single-master"
8292 self.feedback_fn("* changing disks into %s mode" % msg)
8293 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8294 (self.instance.disks, self.instance),
8295 self.instance.name, multimaster)
8296 for node, nres in result.items():
8297 nres.Raise("Cannot change disks config on node %s" % node)
8299 def _ExecCleanup(self):
8300 """Try to cleanup after a failed migration.
8302 The cleanup is done by:
8303 - check that the instance is running only on one node
8304 (and update the config if needed)
8305 - change disks on its secondary node to secondary
8306 - wait until disks are fully synchronized
8307 - disconnect from the network
8308 - change disks into single-master mode
8309 - wait again until disks are fully synchronized
8312 instance = self.instance
8313 target_node = self.target_node
8314 source_node = self.source_node
8316 # check running on only one node
8317 self.feedback_fn("* checking where the instance actually runs"
8318 " (if this hangs, the hypervisor might be in"
8320 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8321 for node, result in ins_l.items():
8322 result.Raise("Can't contact node %s" % node)
8324 runningon_source = instance.name in ins_l[source_node].payload
8325 runningon_target = instance.name in ins_l[target_node].payload
8327 if runningon_source and runningon_target:
8328 raise errors.OpExecError("Instance seems to be running on two nodes,"
8329 " or the hypervisor is confused; you will have"
8330 " to ensure manually that it runs only on one"
8331 " and restart this operation")
8333 if not (runningon_source or runningon_target):
8334 raise errors.OpExecError("Instance does not seem to be running at all;"
8335 " in this case it's safer to repair by"
8336 " running 'gnt-instance stop' to ensure disk"
8337 " shutdown, and then restarting it")
8339 if runningon_target:
8340 # the migration has actually succeeded, we need to update the config
8341 self.feedback_fn("* instance running on secondary node (%s),"
8342 " updating config" % target_node)
8343 instance.primary_node = target_node
8344 self.cfg.Update(instance, self.feedback_fn)
8345 demoted_node = source_node
8347 self.feedback_fn("* instance confirmed to be running on its"
8348 " primary node (%s)" % source_node)
8349 demoted_node = target_node
8351 if instance.disk_template in constants.DTS_INT_MIRROR:
8352 self._EnsureSecondary(demoted_node)
8354 self._WaitUntilSync()
8355 except errors.OpExecError:
8356 # we ignore here errors, since if the device is standalone, it
8357 # won't be able to sync
8359 self._GoStandalone()
8360 self._GoReconnect(False)
8361 self._WaitUntilSync()
8363 self.feedback_fn("* done")
8365 def _RevertDiskStatus(self):
8366 """Try to revert the disk status after a failed migration.
8369 target_node = self.target_node
8370 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8374 self._EnsureSecondary(target_node)
8375 self._GoStandalone()
8376 self._GoReconnect(False)
8377 self._WaitUntilSync()
8378 except errors.OpExecError, err:
8379 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8380 " please try to recover the instance manually;"
8381 " error '%s'" % str(err))
8383 def _AbortMigration(self):
8384 """Call the hypervisor code to abort a started migration.
8387 instance = self.instance
8388 target_node = self.target_node
8389 source_node = self.source_node
8390 migration_info = self.migration_info
8392 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8396 abort_msg = abort_result.fail_msg
8398 logging.error("Aborting migration failed on target node %s: %s",
8399 target_node, abort_msg)
8400 # Don't raise an exception here, as we stil have to try to revert the
8401 # disk status, even if this step failed.
8403 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8404 instance, False, self.live)
8405 abort_msg = abort_result.fail_msg
8407 logging.error("Aborting migration failed on source node %s: %s",
8408 source_node, abort_msg)
8410 def _ExecMigration(self):
8411 """Migrate an instance.
8413 The migrate is done by:
8414 - change the disks into dual-master mode
8415 - wait until disks are fully synchronized again
8416 - migrate the instance
8417 - change disks on the new secondary node (the old primary) to secondary
8418 - wait until disks are fully synchronized
8419 - change disks into single-master mode
8422 instance = self.instance
8423 target_node = self.target_node
8424 source_node = self.source_node
8426 # Check for hypervisor version mismatch and warn the user.
8427 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8428 None, [self.instance.hypervisor])
8429 for ninfo in nodeinfo.values():
8430 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8432 (_, _, (src_info, )) = nodeinfo[source_node].payload
8433 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8435 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8436 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8437 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8438 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8439 if src_version != dst_version:
8440 self.feedback_fn("* warning: hypervisor version mismatch between"
8441 " source (%s) and target (%s) node" %
8442 (src_version, dst_version))
8444 self.feedback_fn("* checking disk consistency between source and target")
8445 for (idx, dev) in enumerate(instance.disks):
8446 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8447 raise errors.OpExecError("Disk %s is degraded or not fully"
8448 " synchronized on target node,"
8449 " aborting migration" % idx)
8451 if self.current_mem > self.tgt_free_mem:
8452 if not self.allow_runtime_changes:
8453 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8454 " free memory to fit instance %s on target"
8455 " node %s (have %dMB, need %dMB)" %
8456 (instance.name, target_node,
8457 self.tgt_free_mem, self.current_mem))
8458 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8459 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8462 rpcres.Raise("Cannot modify instance runtime memory")
8464 # First get the migration information from the remote node
8465 result = self.rpc.call_migration_info(source_node, instance)
8466 msg = result.fail_msg
8468 log_err = ("Failed fetching source migration information from %s: %s" %
8470 logging.error(log_err)
8471 raise errors.OpExecError(log_err)
8473 self.migration_info = migration_info = result.payload
8475 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8476 # Then switch the disks to master/master mode
8477 self._EnsureSecondary(target_node)
8478 self._GoStandalone()
8479 self._GoReconnect(True)
8480 self._WaitUntilSync()
8482 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8483 result = self.rpc.call_accept_instance(target_node,
8486 self.nodes_ip[target_node])
8488 msg = result.fail_msg
8490 logging.error("Instance pre-migration failed, trying to revert"
8491 " disk status: %s", msg)
8492 self.feedback_fn("Pre-migration failed, aborting")
8493 self._AbortMigration()
8494 self._RevertDiskStatus()
8495 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8496 (instance.name, msg))
8498 self.feedback_fn("* migrating instance to %s" % target_node)
8499 result = self.rpc.call_instance_migrate(source_node, instance,
8500 self.nodes_ip[target_node],
8502 msg = result.fail_msg
8504 logging.error("Instance migration failed, trying to revert"
8505 " disk status: %s", msg)
8506 self.feedback_fn("Migration failed, aborting")
8507 self._AbortMigration()
8508 self._RevertDiskStatus()
8509 raise errors.OpExecError("Could not migrate instance %s: %s" %
8510 (instance.name, msg))
8512 self.feedback_fn("* starting memory transfer")
8513 last_feedback = time.time()
8515 result = self.rpc.call_instance_get_migration_status(source_node,
8517 msg = result.fail_msg
8518 ms = result.payload # MigrationStatus instance
8519 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8520 logging.error("Instance migration failed, trying to revert"
8521 " disk status: %s", msg)
8522 self.feedback_fn("Migration failed, aborting")
8523 self._AbortMigration()
8524 self._RevertDiskStatus()
8525 raise errors.OpExecError("Could not migrate instance %s: %s" %
8526 (instance.name, msg))
8528 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8529 self.feedback_fn("* memory transfer complete")
8532 if (utils.TimeoutExpired(last_feedback,
8533 self._MIGRATION_FEEDBACK_INTERVAL) and
8534 ms.transferred_ram is not None):
8535 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8536 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8537 last_feedback = time.time()
8539 time.sleep(self._MIGRATION_POLL_INTERVAL)
8541 result = self.rpc.call_instance_finalize_migration_src(source_node,
8545 msg = result.fail_msg
8547 logging.error("Instance migration succeeded, but finalization failed"
8548 " on the source node: %s", msg)
8549 raise errors.OpExecError("Could not finalize instance migration: %s" %
8552 instance.primary_node = target_node
8554 # distribute new instance config to the other nodes
8555 self.cfg.Update(instance, self.feedback_fn)
8557 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8561 msg = result.fail_msg
8563 logging.error("Instance migration succeeded, but finalization failed"
8564 " on the target node: %s", msg)
8565 raise errors.OpExecError("Could not finalize instance migration: %s" %
8568 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8569 self._EnsureSecondary(source_node)
8570 self._WaitUntilSync()
8571 self._GoStandalone()
8572 self._GoReconnect(False)
8573 self._WaitUntilSync()
8575 # If the instance's disk template is `rbd' and there was a successful
8576 # migration, unmap the device from the source node.
8577 if self.instance.disk_template == constants.DT_RBD:
8578 disks = _ExpandCheckDisks(instance, instance.disks)
8579 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8581 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8582 msg = result.fail_msg
8584 logging.error("Migration was successful, but couldn't unmap the"
8585 " block device %s on source node %s: %s",
8586 disk.iv_name, source_node, msg)
8587 logging.error("You need to unmap the device %s manually on %s",
8588 disk.iv_name, source_node)
8590 self.feedback_fn("* done")
8592 def _ExecFailover(self):
8593 """Failover an instance.
8595 The failover is done by shutting it down on its present node and
8596 starting it on the secondary.
8599 instance = self.instance
8600 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8602 source_node = instance.primary_node
8603 target_node = self.target_node
8605 if instance.admin_state == constants.ADMINST_UP:
8606 self.feedback_fn("* checking disk consistency between source and target")
8607 for (idx, dev) in enumerate(instance.disks):
8608 # for drbd, these are drbd over lvm
8609 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8611 if primary_node.offline:
8612 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8614 (primary_node.name, idx, target_node))
8615 elif not self.ignore_consistency:
8616 raise errors.OpExecError("Disk %s is degraded on target node,"
8617 " aborting failover" % idx)
8619 self.feedback_fn("* not checking disk consistency as instance is not"
8622 self.feedback_fn("* shutting down instance on source node")
8623 logging.info("Shutting down instance %s on node %s",
8624 instance.name, source_node)
8626 result = self.rpc.call_instance_shutdown(source_node, instance,
8627 self.shutdown_timeout)
8628 msg = result.fail_msg
8630 if self.ignore_consistency or primary_node.offline:
8631 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8632 " proceeding anyway; please make sure node"
8633 " %s is down; error details: %s",
8634 instance.name, source_node, source_node, msg)
8636 raise errors.OpExecError("Could not shutdown instance %s on"
8638 (instance.name, source_node, msg))
8640 self.feedback_fn("* deactivating the instance's disks on source node")
8641 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8642 raise errors.OpExecError("Can't shut down the instance's disks")
8644 instance.primary_node = target_node
8645 # distribute new instance config to the other nodes
8646 self.cfg.Update(instance, self.feedback_fn)
8648 # Only start the instance if it's marked as up
8649 if instance.admin_state == constants.ADMINST_UP:
8650 self.feedback_fn("* activating the instance's disks on target node %s" %
8652 logging.info("Starting instance %s on node %s",
8653 instance.name, target_node)
8655 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8656 ignore_secondaries=True)
8658 _ShutdownInstanceDisks(self.lu, instance)
8659 raise errors.OpExecError("Can't activate the instance's disks")
8661 self.feedback_fn("* starting the instance on the target node %s" %
8663 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8665 msg = result.fail_msg
8667 _ShutdownInstanceDisks(self.lu, instance)
8668 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8669 (instance.name, target_node, msg))
8671 def Exec(self, feedback_fn):
8672 """Perform the migration.
8675 self.feedback_fn = feedback_fn
8676 self.source_node = self.instance.primary_node
8678 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8679 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8680 self.target_node = self.instance.secondary_nodes[0]
8681 # Otherwise self.target_node has been populated either
8682 # directly, or through an iallocator.
8684 self.all_nodes = [self.source_node, self.target_node]
8685 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8686 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8689 feedback_fn("Failover instance %s" % self.instance.name)
8690 self._ExecFailover()
8692 feedback_fn("Migrating instance %s" % self.instance.name)
8695 return self._ExecCleanup()
8697 return self._ExecMigration()
8700 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8702 """Wrapper around L{_CreateBlockDevInner}.
8704 This method annotates the root device first.
8707 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8708 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8712 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8714 """Create a tree of block devices on a given node.
8716 If this device type has to be created on secondaries, create it and
8719 If not, just recurse to children keeping the same 'force' value.
8721 @attention: The device has to be annotated already.
8723 @param lu: the lu on whose behalf we execute
8724 @param node: the node on which to create the device
8725 @type instance: L{objects.Instance}
8726 @param instance: the instance which owns the device
8727 @type device: L{objects.Disk}
8728 @param device: the device to create
8729 @type force_create: boolean
8730 @param force_create: whether to force creation of this device; this
8731 will be change to True whenever we find a device which has
8732 CreateOnSecondary() attribute
8733 @param info: the extra 'metadata' we should attach to the device
8734 (this will be represented as a LVM tag)
8735 @type force_open: boolean
8736 @param force_open: this parameter will be passes to the
8737 L{backend.BlockdevCreate} function where it specifies
8738 whether we run on primary or not, and it affects both
8739 the child assembly and the device own Open() execution
8742 if device.CreateOnSecondary():
8746 for child in device.children:
8747 _CreateBlockDevInner(lu, node, instance, child, force_create,
8750 if not force_create:
8753 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8756 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8757 """Create a single block device on a given node.
8759 This will not recurse over children of the device, so they must be
8762 @param lu: the lu on whose behalf we execute
8763 @param node: the node on which to create the device
8764 @type instance: L{objects.Instance}
8765 @param instance: the instance which owns the device
8766 @type device: L{objects.Disk}
8767 @param device: the device to create
8768 @param info: the extra 'metadata' we should attach to the device
8769 (this will be represented as a LVM tag)
8770 @type force_open: boolean
8771 @param force_open: this parameter will be passes to the
8772 L{backend.BlockdevCreate} function where it specifies
8773 whether we run on primary or not, and it affects both
8774 the child assembly and the device own Open() execution
8777 lu.cfg.SetDiskID(device, node)
8778 result = lu.rpc.call_blockdev_create(node, device, device.size,
8779 instance.name, force_open, info)
8780 result.Raise("Can't create block device %s on"
8781 " node %s for instance %s" % (device, node, instance.name))
8782 if device.physical_id is None:
8783 device.physical_id = result.payload
8786 def _GenerateUniqueNames(lu, exts):
8787 """Generate a suitable LV name.
8789 This will generate a logical volume name for the given instance.
8794 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8795 results.append("%s%s" % (new_id, val))
8799 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8800 iv_name, p_minor, s_minor):
8801 """Generate a drbd8 device complete with its children.
8804 assert len(vgnames) == len(names) == 2
8805 port = lu.cfg.AllocatePort()
8806 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8808 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8809 logical_id=(vgnames[0], names[0]),
8811 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8812 logical_id=(vgnames[1], names[1]),
8814 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8815 logical_id=(primary, secondary, port,
8818 children=[dev_data, dev_meta],
8819 iv_name=iv_name, params={})
8823 _DISK_TEMPLATE_NAME_PREFIX = {
8824 constants.DT_PLAIN: "",
8825 constants.DT_RBD: ".rbd",
8829 _DISK_TEMPLATE_DEVICE_TYPE = {
8830 constants.DT_PLAIN: constants.LD_LV,
8831 constants.DT_FILE: constants.LD_FILE,
8832 constants.DT_SHARED_FILE: constants.LD_FILE,
8833 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8834 constants.DT_RBD: constants.LD_RBD,
8838 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8839 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8840 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8841 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8842 """Generate the entire disk layout for a given template type.
8845 #TODO: compute space requirements
8847 vgname = lu.cfg.GetVGName()
8848 disk_count = len(disk_info)
8851 if template_name == constants.DT_DISKLESS:
8853 elif template_name == constants.DT_DRBD8:
8854 if len(secondary_nodes) != 1:
8855 raise errors.ProgrammerError("Wrong template configuration")
8856 remote_node = secondary_nodes[0]
8857 minors = lu.cfg.AllocateDRBDMinor(
8858 [primary_node, remote_node] * len(disk_info), instance_name)
8860 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8862 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8865 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8866 for i in range(disk_count)]):
8867 names.append(lv_prefix + "_data")
8868 names.append(lv_prefix + "_meta")
8869 for idx, disk in enumerate(disk_info):
8870 disk_index = idx + base_index
8871 data_vg = disk.get(constants.IDISK_VG, vgname)
8872 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8873 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8874 disk[constants.IDISK_SIZE],
8876 names[idx * 2:idx * 2 + 2],
8877 "disk/%d" % disk_index,
8878 minors[idx * 2], minors[idx * 2 + 1])
8879 disk_dev.mode = disk[constants.IDISK_MODE]
8880 disks.append(disk_dev)
8883 raise errors.ProgrammerError("Wrong template configuration")
8885 if template_name == constants.DT_FILE:
8887 elif template_name == constants.DT_SHARED_FILE:
8888 _req_shr_file_storage()
8890 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8891 if name_prefix is None:
8894 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8895 (name_prefix, base_index + i)
8896 for i in range(disk_count)])
8898 if template_name == constants.DT_PLAIN:
8899 def logical_id_fn(idx, _, disk):
8900 vg = disk.get(constants.IDISK_VG, vgname)
8901 return (vg, names[idx])
8902 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8904 lambda _, disk_index, disk: (file_driver,
8905 "%s/disk%d" % (file_storage_dir,
8907 elif template_name == constants.DT_BLOCK:
8909 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8910 disk[constants.IDISK_ADOPT])
8911 elif template_name == constants.DT_RBD:
8912 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8914 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8916 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8918 for idx, disk in enumerate(disk_info):
8919 disk_index = idx + base_index
8920 size = disk[constants.IDISK_SIZE]
8921 feedback_fn("* disk %s, size %s" %
8922 (disk_index, utils.FormatUnit(size, "h")))
8923 disks.append(objects.Disk(dev_type=dev_type, size=size,
8924 logical_id=logical_id_fn(idx, disk_index, disk),
8925 iv_name="disk/%d" % disk_index,
8926 mode=disk[constants.IDISK_MODE],
8932 def _GetInstanceInfoText(instance):
8933 """Compute that text that should be added to the disk's metadata.
8936 return "originstname+%s" % instance.name
8939 def _CalcEta(time_taken, written, total_size):
8940 """Calculates the ETA based on size written and total size.
8942 @param time_taken: The time taken so far
8943 @param written: amount written so far
8944 @param total_size: The total size of data to be written
8945 @return: The remaining time in seconds
8948 avg_time = time_taken / float(written)
8949 return (total_size - written) * avg_time
8952 def _WipeDisks(lu, instance):
8953 """Wipes instance disks.
8955 @type lu: L{LogicalUnit}
8956 @param lu: the logical unit on whose behalf we execute
8957 @type instance: L{objects.Instance}
8958 @param instance: the instance whose disks we should create
8959 @return: the success of the wipe
8962 node = instance.primary_node
8964 for device in instance.disks:
8965 lu.cfg.SetDiskID(device, node)
8967 logging.info("Pause sync of instance %s disks", instance.name)
8968 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8969 (instance.disks, instance),
8972 for idx, success in enumerate(result.payload):
8974 logging.warn("pause-sync of instance %s for disks %d failed",
8978 for idx, device in enumerate(instance.disks):
8979 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8980 # MAX_WIPE_CHUNK at max
8981 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8982 constants.MIN_WIPE_CHUNK_PERCENT)
8983 # we _must_ make this an int, otherwise rounding errors will
8985 wipe_chunk_size = int(wipe_chunk_size)
8987 lu.LogInfo("* Wiping disk %d", idx)
8988 logging.info("Wiping disk %d for instance %s, node %s using"
8989 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8994 start_time = time.time()
8996 while offset < size:
8997 wipe_size = min(wipe_chunk_size, size - offset)
8998 logging.debug("Wiping disk %d, offset %s, chunk %s",
8999 idx, offset, wipe_size)
9000 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9002 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9003 (idx, offset, wipe_size))
9006 if now - last_output >= 60:
9007 eta = _CalcEta(now - start_time, offset, size)
9008 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9009 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9012 logging.info("Resume sync of instance %s disks", instance.name)
9014 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9015 (instance.disks, instance),
9018 for idx, success in enumerate(result.payload):
9020 lu.LogWarning("Resume sync of disk %d failed, please have a"
9021 " look at the status and troubleshoot the issue", idx)
9022 logging.warn("resume-sync of instance %s for disks %d failed",
9026 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9027 """Create all disks for an instance.
9029 This abstracts away some work from AddInstance.
9031 @type lu: L{LogicalUnit}
9032 @param lu: the logical unit on whose behalf we execute
9033 @type instance: L{objects.Instance}
9034 @param instance: the instance whose disks we should create
9036 @param to_skip: list of indices to skip
9037 @type target_node: string
9038 @param target_node: if passed, overrides the target node for creation
9040 @return: the success of the creation
9043 info = _GetInstanceInfoText(instance)
9044 if target_node is None:
9045 pnode = instance.primary_node
9046 all_nodes = instance.all_nodes
9051 if instance.disk_template in constants.DTS_FILEBASED:
9052 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9053 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9055 result.Raise("Failed to create directory '%s' on"
9056 " node %s" % (file_storage_dir, pnode))
9058 # Note: this needs to be kept in sync with adding of disks in
9059 # LUInstanceSetParams
9060 for idx, device in enumerate(instance.disks):
9061 if to_skip and idx in to_skip:
9063 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9065 for node in all_nodes:
9066 f_create = node == pnode
9067 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9070 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9071 """Remove all disks for an instance.
9073 This abstracts away some work from `AddInstance()` and
9074 `RemoveInstance()`. Note that in case some of the devices couldn't
9075 be removed, the removal will continue with the other ones (compare
9076 with `_CreateDisks()`).
9078 @type lu: L{LogicalUnit}
9079 @param lu: the logical unit on whose behalf we execute
9080 @type instance: L{objects.Instance}
9081 @param instance: the instance whose disks we should remove
9082 @type target_node: string
9083 @param target_node: used to override the node on which to remove the disks
9085 @return: the success of the removal
9088 logging.info("Removing block devices for instance %s", instance.name)
9091 ports_to_release = set()
9092 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9093 for (idx, device) in enumerate(anno_disks):
9095 edata = [(target_node, device)]
9097 edata = device.ComputeNodeTree(instance.primary_node)
9098 for node, disk in edata:
9099 lu.cfg.SetDiskID(disk, node)
9100 result = lu.rpc.call_blockdev_remove(node, disk)
9102 lu.LogWarning("Could not remove disk %s on node %s,"
9103 " continuing anyway: %s", idx, node, result.fail_msg)
9104 if not (result.offline and node != instance.primary_node):
9107 # if this is a DRBD disk, return its port to the pool
9108 if device.dev_type in constants.LDS_DRBD:
9109 ports_to_release.add(device.logical_id[2])
9111 if all_result or ignore_failures:
9112 for port in ports_to_release:
9113 lu.cfg.AddTcpUdpPort(port)
9115 if instance.disk_template == constants.DT_FILE:
9116 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9120 tgt = instance.primary_node
9121 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9123 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9124 file_storage_dir, instance.primary_node, result.fail_msg)
9130 def _ComputeDiskSizePerVG(disk_template, disks):
9131 """Compute disk size requirements in the volume group
9134 def _compute(disks, payload):
9135 """Universal algorithm.
9140 vgs[disk[constants.IDISK_VG]] = \
9141 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9145 # Required free disk space as a function of disk and swap space
9147 constants.DT_DISKLESS: {},
9148 constants.DT_PLAIN: _compute(disks, 0),
9149 # 128 MB are added for drbd metadata for each disk
9150 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9151 constants.DT_FILE: {},
9152 constants.DT_SHARED_FILE: {},
9155 if disk_template not in req_size_dict:
9156 raise errors.ProgrammerError("Disk template '%s' size requirement"
9157 " is unknown" % disk_template)
9159 return req_size_dict[disk_template]
9162 def _ComputeDiskSize(disk_template, disks):
9163 """Compute disk size requirements in the volume group
9166 # Required free disk space as a function of disk and swap space
9168 constants.DT_DISKLESS: None,
9169 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9170 # 128 MB are added for drbd metadata for each disk
9172 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9173 constants.DT_FILE: None,
9174 constants.DT_SHARED_FILE: 0,
9175 constants.DT_BLOCK: 0,
9176 constants.DT_RBD: 0,
9179 if disk_template not in req_size_dict:
9180 raise errors.ProgrammerError("Disk template '%s' size requirement"
9181 " is unknown" % disk_template)
9183 return req_size_dict[disk_template]
9186 def _FilterVmNodes(lu, nodenames):
9187 """Filters out non-vm_capable nodes from a list.
9189 @type lu: L{LogicalUnit}
9190 @param lu: the logical unit for which we check
9191 @type nodenames: list
9192 @param nodenames: the list of nodes on which we should check
9194 @return: the list of vm-capable nodes
9197 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9198 return [name for name in nodenames if name not in vm_nodes]
9201 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9202 """Hypervisor parameter validation.
9204 This function abstract the hypervisor parameter validation to be
9205 used in both instance create and instance modify.
9207 @type lu: L{LogicalUnit}
9208 @param lu: the logical unit for which we check
9209 @type nodenames: list
9210 @param nodenames: the list of nodes on which we should check
9211 @type hvname: string
9212 @param hvname: the name of the hypervisor we should use
9213 @type hvparams: dict
9214 @param hvparams: the parameters which we need to check
9215 @raise errors.OpPrereqError: if the parameters are not valid
9218 nodenames = _FilterVmNodes(lu, nodenames)
9220 cluster = lu.cfg.GetClusterInfo()
9221 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9223 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9224 for node in nodenames:
9228 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9231 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9232 """OS parameters validation.
9234 @type lu: L{LogicalUnit}
9235 @param lu: the logical unit for which we check
9236 @type required: boolean
9237 @param required: whether the validation should fail if the OS is not
9239 @type nodenames: list
9240 @param nodenames: the list of nodes on which we should check
9241 @type osname: string
9242 @param osname: the name of the hypervisor we should use
9243 @type osparams: dict
9244 @param osparams: the parameters which we need to check
9245 @raise errors.OpPrereqError: if the parameters are not valid
9248 nodenames = _FilterVmNodes(lu, nodenames)
9249 result = lu.rpc.call_os_validate(nodenames, required, osname,
9250 [constants.OS_VALIDATE_PARAMETERS],
9252 for node, nres in result.items():
9253 # we don't check for offline cases since this should be run only
9254 # against the master node and/or an instance's nodes
9255 nres.Raise("OS Parameters validation failed on node %s" % node)
9256 if not nres.payload:
9257 lu.LogInfo("OS %s not found on node %s, validation skipped",
9261 class LUInstanceCreate(LogicalUnit):
9262 """Create an instance.
9265 HPATH = "instance-add"
9266 HTYPE = constants.HTYPE_INSTANCE
9269 def CheckArguments(self):
9273 # do not require name_check to ease forward/backward compatibility
9275 if self.op.no_install and self.op.start:
9276 self.LogInfo("No-installation mode selected, disabling startup")
9277 self.op.start = False
9278 # validate/normalize the instance name
9279 self.op.instance_name = \
9280 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9282 if self.op.ip_check and not self.op.name_check:
9283 # TODO: make the ip check more flexible and not depend on the name check
9284 raise errors.OpPrereqError("Cannot do IP address check without a name"
9285 " check", errors.ECODE_INVAL)
9287 # check nics' parameter names
9288 for nic in self.op.nics:
9289 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9291 # check disks. parameter names and consistent adopt/no-adopt strategy
9292 has_adopt = has_no_adopt = False
9293 for disk in self.op.disks:
9294 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9295 if constants.IDISK_ADOPT in disk:
9299 if has_adopt and has_no_adopt:
9300 raise errors.OpPrereqError("Either all disks are adopted or none is",
9303 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9304 raise errors.OpPrereqError("Disk adoption is not supported for the"
9305 " '%s' disk template" %
9306 self.op.disk_template,
9308 if self.op.iallocator is not None:
9309 raise errors.OpPrereqError("Disk adoption not allowed with an"
9310 " iallocator script", errors.ECODE_INVAL)
9311 if self.op.mode == constants.INSTANCE_IMPORT:
9312 raise errors.OpPrereqError("Disk adoption not allowed for"
9313 " instance import", errors.ECODE_INVAL)
9315 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9316 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9317 " but no 'adopt' parameter given" %
9318 self.op.disk_template,
9321 self.adopt_disks = has_adopt
9323 # instance name verification
9324 if self.op.name_check:
9325 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9326 self.op.instance_name = self.hostname1.name
9327 # used in CheckPrereq for ip ping check
9328 self.check_ip = self.hostname1.ip
9330 self.check_ip = None
9332 # file storage checks
9333 if (self.op.file_driver and
9334 not self.op.file_driver in constants.FILE_DRIVER):
9335 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9336 self.op.file_driver, errors.ECODE_INVAL)
9338 if self.op.disk_template == constants.DT_FILE:
9339 opcodes.RequireFileStorage()
9340 elif self.op.disk_template == constants.DT_SHARED_FILE:
9341 opcodes.RequireSharedFileStorage()
9343 ### Node/iallocator related checks
9344 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9346 if self.op.pnode is not None:
9347 if self.op.disk_template in constants.DTS_INT_MIRROR:
9348 if self.op.snode is None:
9349 raise errors.OpPrereqError("The networked disk templates need"
9350 " a mirror node", errors.ECODE_INVAL)
9352 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9354 self.op.snode = None
9356 self._cds = _GetClusterDomainSecret()
9358 if self.op.mode == constants.INSTANCE_IMPORT:
9359 # On import force_variant must be True, because if we forced it at
9360 # initial install, our only chance when importing it back is that it
9362 self.op.force_variant = True
9364 if self.op.no_install:
9365 self.LogInfo("No-installation mode has no effect during import")
9367 elif self.op.mode == constants.INSTANCE_CREATE:
9368 if self.op.os_type is None:
9369 raise errors.OpPrereqError("No guest OS specified",
9371 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9372 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9373 " installation" % self.op.os_type,
9375 if self.op.disk_template is None:
9376 raise errors.OpPrereqError("No disk template specified",
9379 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9380 # Check handshake to ensure both clusters have the same domain secret
9381 src_handshake = self.op.source_handshake
9382 if not src_handshake:
9383 raise errors.OpPrereqError("Missing source handshake",
9386 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9389 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9392 # Load and check source CA
9393 self.source_x509_ca_pem = self.op.source_x509_ca
9394 if not self.source_x509_ca_pem:
9395 raise errors.OpPrereqError("Missing source X509 CA",
9399 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9401 except OpenSSL.crypto.Error, err:
9402 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9403 (err, ), errors.ECODE_INVAL)
9405 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9406 if errcode is not None:
9407 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9410 self.source_x509_ca = cert
9412 src_instance_name = self.op.source_instance_name
9413 if not src_instance_name:
9414 raise errors.OpPrereqError("Missing source instance name",
9417 self.source_instance_name = \
9418 netutils.GetHostname(name=src_instance_name).name
9421 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9422 self.op.mode, errors.ECODE_INVAL)
9424 def ExpandNames(self):
9425 """ExpandNames for CreateInstance.
9427 Figure out the right locks for instance creation.
9430 self.needed_locks = {}
9432 instance_name = self.op.instance_name
9433 # this is just a preventive check, but someone might still add this
9434 # instance in the meantime, and creation will fail at lock-add time
9435 if instance_name in self.cfg.GetInstanceList():
9436 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9437 instance_name, errors.ECODE_EXISTS)
9439 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9441 if self.op.iallocator:
9442 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9443 # specifying a group on instance creation and then selecting nodes from
9445 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9446 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9448 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9449 nodelist = [self.op.pnode]
9450 if self.op.snode is not None:
9451 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9452 nodelist.append(self.op.snode)
9453 self.needed_locks[locking.LEVEL_NODE] = nodelist
9454 # Lock resources of instance's primary and secondary nodes (copy to
9455 # prevent accidential modification)
9456 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9458 # in case of import lock the source node too
9459 if self.op.mode == constants.INSTANCE_IMPORT:
9460 src_node = self.op.src_node
9461 src_path = self.op.src_path
9463 if src_path is None:
9464 self.op.src_path = src_path = self.op.instance_name
9466 if src_node is None:
9467 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9468 self.op.src_node = None
9469 if os.path.isabs(src_path):
9470 raise errors.OpPrereqError("Importing an instance from a path"
9471 " requires a source node option",
9474 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9475 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9476 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9477 if not os.path.isabs(src_path):
9478 self.op.src_path = src_path = \
9479 utils.PathJoin(constants.EXPORT_DIR, src_path)
9481 def _RunAllocator(self):
9482 """Run the allocator based on input opcode.
9485 nics = [n.ToDict() for n in self.nics]
9486 ial = IAllocator(self.cfg, self.rpc,
9487 mode=constants.IALLOCATOR_MODE_ALLOC,
9488 name=self.op.instance_name,
9489 disk_template=self.op.disk_template,
9492 vcpus=self.be_full[constants.BE_VCPUS],
9493 memory=self.be_full[constants.BE_MAXMEM],
9494 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9497 hypervisor=self.op.hypervisor,
9500 ial.Run(self.op.iallocator)
9503 raise errors.OpPrereqError("Can't compute nodes using"
9504 " iallocator '%s': %s" %
9505 (self.op.iallocator, ial.info),
9507 if len(ial.result) != ial.required_nodes:
9508 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9509 " of nodes (%s), required %s" %
9510 (self.op.iallocator, len(ial.result),
9511 ial.required_nodes), errors.ECODE_FAULT)
9512 self.op.pnode = ial.result[0]
9513 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9514 self.op.instance_name, self.op.iallocator,
9515 utils.CommaJoin(ial.result))
9516 if ial.required_nodes == 2:
9517 self.op.snode = ial.result[1]
9519 def BuildHooksEnv(self):
9522 This runs on master, primary and secondary nodes of the instance.
9526 "ADD_MODE": self.op.mode,
9528 if self.op.mode == constants.INSTANCE_IMPORT:
9529 env["SRC_NODE"] = self.op.src_node
9530 env["SRC_PATH"] = self.op.src_path
9531 env["SRC_IMAGES"] = self.src_images
9533 env.update(_BuildInstanceHookEnv(
9534 name=self.op.instance_name,
9535 primary_node=self.op.pnode,
9536 secondary_nodes=self.secondaries,
9537 status=self.op.start,
9538 os_type=self.op.os_type,
9539 minmem=self.be_full[constants.BE_MINMEM],
9540 maxmem=self.be_full[constants.BE_MAXMEM],
9541 vcpus=self.be_full[constants.BE_VCPUS],
9542 nics=_NICListToTuple(self, self.nics),
9543 disk_template=self.op.disk_template,
9544 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9545 for d in self.disks],
9548 hypervisor_name=self.op.hypervisor,
9554 def BuildHooksNodes(self):
9555 """Build hooks nodes.
9558 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9561 def _ReadExportInfo(self):
9562 """Reads the export information from disk.
9564 It will override the opcode source node and path with the actual
9565 information, if these two were not specified before.
9567 @return: the export information
9570 assert self.op.mode == constants.INSTANCE_IMPORT
9572 src_node = self.op.src_node
9573 src_path = self.op.src_path
9575 if src_node is None:
9576 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9577 exp_list = self.rpc.call_export_list(locked_nodes)
9579 for node in exp_list:
9580 if exp_list[node].fail_msg:
9582 if src_path in exp_list[node].payload:
9584 self.op.src_node = src_node = node
9585 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9589 raise errors.OpPrereqError("No export found for relative path %s" %
9590 src_path, errors.ECODE_INVAL)
9592 _CheckNodeOnline(self, src_node)
9593 result = self.rpc.call_export_info(src_node, src_path)
9594 result.Raise("No export or invalid export found in dir %s" % src_path)
9596 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9597 if not export_info.has_section(constants.INISECT_EXP):
9598 raise errors.ProgrammerError("Corrupted export config",
9599 errors.ECODE_ENVIRON)
9601 ei_version = export_info.get(constants.INISECT_EXP, "version")
9602 if (int(ei_version) != constants.EXPORT_VERSION):
9603 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9604 (ei_version, constants.EXPORT_VERSION),
9605 errors.ECODE_ENVIRON)
9608 def _ReadExportParams(self, einfo):
9609 """Use export parameters as defaults.
9611 In case the opcode doesn't specify (as in override) some instance
9612 parameters, then try to use them from the export information, if
9616 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9618 if self.op.disk_template is None:
9619 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9620 self.op.disk_template = einfo.get(constants.INISECT_INS,
9622 if self.op.disk_template not in constants.DISK_TEMPLATES:
9623 raise errors.OpPrereqError("Disk template specified in configuration"
9624 " file is not one of the allowed values:"
9625 " %s" % " ".join(constants.DISK_TEMPLATES))
9627 raise errors.OpPrereqError("No disk template specified and the export"
9628 " is missing the disk_template information",
9631 if not self.op.disks:
9633 # TODO: import the disk iv_name too
9634 for idx in range(constants.MAX_DISKS):
9635 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9636 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9637 disks.append({constants.IDISK_SIZE: disk_sz})
9638 self.op.disks = disks
9639 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9640 raise errors.OpPrereqError("No disk info specified and the export"
9641 " is missing the disk information",
9644 if not self.op.nics:
9646 for idx in range(constants.MAX_NICS):
9647 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9649 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9650 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9657 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9658 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9660 if (self.op.hypervisor is None and
9661 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9662 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9664 if einfo.has_section(constants.INISECT_HYP):
9665 # use the export parameters but do not override the ones
9666 # specified by the user
9667 for name, value in einfo.items(constants.INISECT_HYP):
9668 if name not in self.op.hvparams:
9669 self.op.hvparams[name] = value
9671 if einfo.has_section(constants.INISECT_BEP):
9672 # use the parameters, without overriding
9673 for name, value in einfo.items(constants.INISECT_BEP):
9674 if name not in self.op.beparams:
9675 self.op.beparams[name] = value
9676 # Compatibility for the old "memory" be param
9677 if name == constants.BE_MEMORY:
9678 if constants.BE_MAXMEM not in self.op.beparams:
9679 self.op.beparams[constants.BE_MAXMEM] = value
9680 if constants.BE_MINMEM not in self.op.beparams:
9681 self.op.beparams[constants.BE_MINMEM] = value
9683 # try to read the parameters old style, from the main section
9684 for name in constants.BES_PARAMETERS:
9685 if (name not in self.op.beparams and
9686 einfo.has_option(constants.INISECT_INS, name)):
9687 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9689 if einfo.has_section(constants.INISECT_OSP):
9690 # use the parameters, without overriding
9691 for name, value in einfo.items(constants.INISECT_OSP):
9692 if name not in self.op.osparams:
9693 self.op.osparams[name] = value
9695 def _RevertToDefaults(self, cluster):
9696 """Revert the instance parameters to the default values.
9700 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9701 for name in self.op.hvparams.keys():
9702 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9703 del self.op.hvparams[name]
9705 be_defs = cluster.SimpleFillBE({})
9706 for name in self.op.beparams.keys():
9707 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9708 del self.op.beparams[name]
9710 nic_defs = cluster.SimpleFillNIC({})
9711 for nic in self.op.nics:
9712 for name in constants.NICS_PARAMETERS:
9713 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9716 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9717 for name in self.op.osparams.keys():
9718 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9719 del self.op.osparams[name]
9721 def _CalculateFileStorageDir(self):
9722 """Calculate final instance file storage dir.
9725 # file storage dir calculation/check
9726 self.instance_file_storage_dir = None
9727 if self.op.disk_template in constants.DTS_FILEBASED:
9728 # build the full file storage dir path
9731 if self.op.disk_template == constants.DT_SHARED_FILE:
9732 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9734 get_fsd_fn = self.cfg.GetFileStorageDir
9736 cfg_storagedir = get_fsd_fn()
9737 if not cfg_storagedir:
9738 raise errors.OpPrereqError("Cluster file storage dir not defined")
9739 joinargs.append(cfg_storagedir)
9741 if self.op.file_storage_dir is not None:
9742 joinargs.append(self.op.file_storage_dir)
9744 joinargs.append(self.op.instance_name)
9746 # pylint: disable=W0142
9747 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9749 def CheckPrereq(self): # pylint: disable=R0914
9750 """Check prerequisites.
9753 self._CalculateFileStorageDir()
9755 if self.op.mode == constants.INSTANCE_IMPORT:
9756 export_info = self._ReadExportInfo()
9757 self._ReadExportParams(export_info)
9758 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9760 self._old_instance_name = None
9762 if (not self.cfg.GetVGName() and
9763 self.op.disk_template not in constants.DTS_NOT_LVM):
9764 raise errors.OpPrereqError("Cluster does not support lvm-based"
9765 " instances", errors.ECODE_STATE)
9767 if (self.op.hypervisor is None or
9768 self.op.hypervisor == constants.VALUE_AUTO):
9769 self.op.hypervisor = self.cfg.GetHypervisorType()
9771 cluster = self.cfg.GetClusterInfo()
9772 enabled_hvs = cluster.enabled_hypervisors
9773 if self.op.hypervisor not in enabled_hvs:
9774 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9775 " cluster (%s)" % (self.op.hypervisor,
9776 ",".join(enabled_hvs)),
9779 # Check tag validity
9780 for tag in self.op.tags:
9781 objects.TaggableObject.ValidateTag(tag)
9783 # check hypervisor parameter syntax (locally)
9784 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9785 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9787 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9788 hv_type.CheckParameterSyntax(filled_hvp)
9789 self.hv_full = filled_hvp
9790 # check that we don't specify global parameters on an instance
9791 _CheckGlobalHvParams(self.op.hvparams)
9793 # fill and remember the beparams dict
9794 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9795 for param, value in self.op.beparams.iteritems():
9796 if value == constants.VALUE_AUTO:
9797 self.op.beparams[param] = default_beparams[param]
9798 objects.UpgradeBeParams(self.op.beparams)
9799 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9800 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9802 # build os parameters
9803 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9805 # now that hvp/bep are in final format, let's reset to defaults,
9807 if self.op.identify_defaults:
9808 self._RevertToDefaults(cluster)
9812 for idx, nic in enumerate(self.op.nics):
9813 nic_mode_req = nic.get(constants.INIC_MODE, None)
9814 nic_mode = nic_mode_req
9815 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9816 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9818 # in routed mode, for the first nic, the default ip is 'auto'
9819 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9820 default_ip_mode = constants.VALUE_AUTO
9822 default_ip_mode = constants.VALUE_NONE
9824 # ip validity checks
9825 ip = nic.get(constants.INIC_IP, default_ip_mode)
9826 if ip is None or ip.lower() == constants.VALUE_NONE:
9828 elif ip.lower() == constants.VALUE_AUTO:
9829 if not self.op.name_check:
9830 raise errors.OpPrereqError("IP address set to auto but name checks"
9831 " have been skipped",
9833 nic_ip = self.hostname1.ip
9835 if not netutils.IPAddress.IsValid(ip):
9836 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9840 # TODO: check the ip address for uniqueness
9841 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9842 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9845 # MAC address verification
9846 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9847 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9848 mac = utils.NormalizeAndValidateMac(mac)
9851 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9852 except errors.ReservationError:
9853 raise errors.OpPrereqError("MAC address %s already in use"
9854 " in cluster" % mac,
9855 errors.ECODE_NOTUNIQUE)
9857 # Build nic parameters
9858 link = nic.get(constants.INIC_LINK, None)
9859 if link == constants.VALUE_AUTO:
9860 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9863 nicparams[constants.NIC_MODE] = nic_mode
9865 nicparams[constants.NIC_LINK] = link
9867 check_params = cluster.SimpleFillNIC(nicparams)
9868 objects.NIC.CheckParameterSyntax(check_params)
9869 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9871 # disk checks/pre-build
9872 default_vg = self.cfg.GetVGName()
9874 for disk in self.op.disks:
9875 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9876 if mode not in constants.DISK_ACCESS_SET:
9877 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9878 mode, errors.ECODE_INVAL)
9879 size = disk.get(constants.IDISK_SIZE, None)
9881 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9884 except (TypeError, ValueError):
9885 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9888 data_vg = disk.get(constants.IDISK_VG, default_vg)
9890 constants.IDISK_SIZE: size,
9891 constants.IDISK_MODE: mode,
9892 constants.IDISK_VG: data_vg,
9894 if constants.IDISK_METAVG in disk:
9895 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9896 if constants.IDISK_ADOPT in disk:
9897 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9898 self.disks.append(new_disk)
9900 if self.op.mode == constants.INSTANCE_IMPORT:
9902 for idx in range(len(self.disks)):
9903 option = "disk%d_dump" % idx
9904 if export_info.has_option(constants.INISECT_INS, option):
9905 # FIXME: are the old os-es, disk sizes, etc. useful?
9906 export_name = export_info.get(constants.INISECT_INS, option)
9907 image = utils.PathJoin(self.op.src_path, export_name)
9908 disk_images.append(image)
9910 disk_images.append(False)
9912 self.src_images = disk_images
9914 if self.op.instance_name == self._old_instance_name:
9915 for idx, nic in enumerate(self.nics):
9916 if nic.mac == constants.VALUE_AUTO:
9917 nic_mac_ini = "nic%d_mac" % idx
9918 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9920 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9922 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9923 if self.op.ip_check:
9924 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9925 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9926 (self.check_ip, self.op.instance_name),
9927 errors.ECODE_NOTUNIQUE)
9929 #### mac address generation
9930 # By generating here the mac address both the allocator and the hooks get
9931 # the real final mac address rather than the 'auto' or 'generate' value.
9932 # There is a race condition between the generation and the instance object
9933 # creation, which means that we know the mac is valid now, but we're not
9934 # sure it will be when we actually add the instance. If things go bad
9935 # adding the instance will abort because of a duplicate mac, and the
9936 # creation job will fail.
9937 for nic in self.nics:
9938 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9939 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9943 if self.op.iallocator is not None:
9944 self._RunAllocator()
9946 # Release all unneeded node locks
9947 _ReleaseLocks(self, locking.LEVEL_NODE,
9948 keep=filter(None, [self.op.pnode, self.op.snode,
9950 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9951 keep=filter(None, [self.op.pnode, self.op.snode,
9954 #### node related checks
9956 # check primary node
9957 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9958 assert self.pnode is not None, \
9959 "Cannot retrieve locked node %s" % self.op.pnode
9961 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9962 pnode.name, errors.ECODE_STATE)
9964 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9965 pnode.name, errors.ECODE_STATE)
9966 if not pnode.vm_capable:
9967 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9968 " '%s'" % pnode.name, errors.ECODE_STATE)
9970 self.secondaries = []
9972 # mirror node verification
9973 if self.op.disk_template in constants.DTS_INT_MIRROR:
9974 if self.op.snode == pnode.name:
9975 raise errors.OpPrereqError("The secondary node cannot be the"
9976 " primary node", errors.ECODE_INVAL)
9977 _CheckNodeOnline(self, self.op.snode)
9978 _CheckNodeNotDrained(self, self.op.snode)
9979 _CheckNodeVmCapable(self, self.op.snode)
9980 self.secondaries.append(self.op.snode)
9982 snode = self.cfg.GetNodeInfo(self.op.snode)
9983 if pnode.group != snode.group:
9984 self.LogWarning("The primary and secondary nodes are in two"
9985 " different node groups; the disk parameters"
9986 " from the first disk's node group will be"
9989 nodenames = [pnode.name] + self.secondaries
9991 # Verify instance specs
9992 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9994 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9995 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9996 constants.ISPEC_DISK_COUNT: len(self.disks),
9997 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9998 constants.ISPEC_NIC_COUNT: len(self.nics),
9999 constants.ISPEC_SPINDLE_USE: spindle_use,
10002 group_info = self.cfg.GetNodeGroup(pnode.group)
10003 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10004 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10005 if not self.op.ignore_ipolicy and res:
10006 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10007 " policy: %s") % (pnode.group,
10008 utils.CommaJoin(res)),
10009 errors.ECODE_INVAL)
10011 if not self.adopt_disks:
10012 if self.op.disk_template == constants.DT_RBD:
10013 # _CheckRADOSFreeSpace() is just a placeholder.
10014 # Any function that checks prerequisites can be placed here.
10015 # Check if there is enough space on the RADOS cluster.
10016 _CheckRADOSFreeSpace()
10018 # Check lv size requirements, if not adopting
10019 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10020 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10022 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10023 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10024 disk[constants.IDISK_ADOPT])
10025 for disk in self.disks])
10026 if len(all_lvs) != len(self.disks):
10027 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10028 errors.ECODE_INVAL)
10029 for lv_name in all_lvs:
10031 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10032 # to ReserveLV uses the same syntax
10033 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10034 except errors.ReservationError:
10035 raise errors.OpPrereqError("LV named %s used by another instance" %
10036 lv_name, errors.ECODE_NOTUNIQUE)
10038 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10039 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10041 node_lvs = self.rpc.call_lv_list([pnode.name],
10042 vg_names.payload.keys())[pnode.name]
10043 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10044 node_lvs = node_lvs.payload
10046 delta = all_lvs.difference(node_lvs.keys())
10048 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10049 utils.CommaJoin(delta),
10050 errors.ECODE_INVAL)
10051 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10053 raise errors.OpPrereqError("Online logical volumes found, cannot"
10054 " adopt: %s" % utils.CommaJoin(online_lvs),
10055 errors.ECODE_STATE)
10056 # update the size of disk based on what is found
10057 for dsk in self.disks:
10058 dsk[constants.IDISK_SIZE] = \
10059 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10060 dsk[constants.IDISK_ADOPT])][0]))
10062 elif self.op.disk_template == constants.DT_BLOCK:
10063 # Normalize and de-duplicate device paths
10064 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10065 for disk in self.disks])
10066 if len(all_disks) != len(self.disks):
10067 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10068 errors.ECODE_INVAL)
10069 baddisks = [d for d in all_disks
10070 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10072 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10073 " cannot be adopted" %
10074 (", ".join(baddisks),
10075 constants.ADOPTABLE_BLOCKDEV_ROOT),
10076 errors.ECODE_INVAL)
10078 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10079 list(all_disks))[pnode.name]
10080 node_disks.Raise("Cannot get block device information from node %s" %
10082 node_disks = node_disks.payload
10083 delta = all_disks.difference(node_disks.keys())
10085 raise errors.OpPrereqError("Missing block device(s): %s" %
10086 utils.CommaJoin(delta),
10087 errors.ECODE_INVAL)
10088 for dsk in self.disks:
10089 dsk[constants.IDISK_SIZE] = \
10090 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10092 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10094 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10095 # check OS parameters (remotely)
10096 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10098 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10100 # memory check on primary node
10101 #TODO(dynmem): use MINMEM for checking
10103 _CheckNodeFreeMemory(self, self.pnode.name,
10104 "creating instance %s" % self.op.instance_name,
10105 self.be_full[constants.BE_MAXMEM],
10106 self.op.hypervisor)
10108 self.dry_run_result = list(nodenames)
10110 def Exec(self, feedback_fn):
10111 """Create and add the instance to the cluster.
10114 instance = self.op.instance_name
10115 pnode_name = self.pnode.name
10117 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10118 self.owned_locks(locking.LEVEL_NODE)), \
10119 "Node locks differ from node resource locks"
10121 ht_kind = self.op.hypervisor
10122 if ht_kind in constants.HTS_REQ_PORT:
10123 network_port = self.cfg.AllocatePort()
10125 network_port = None
10127 # This is ugly but we got a chicken-egg problem here
10128 # We can only take the group disk parameters, as the instance
10129 # has no disks yet (we are generating them right here).
10130 node = self.cfg.GetNodeInfo(pnode_name)
10131 nodegroup = self.cfg.GetNodeGroup(node.group)
10132 disks = _GenerateDiskTemplate(self,
10133 self.op.disk_template,
10134 instance, pnode_name,
10137 self.instance_file_storage_dir,
10138 self.op.file_driver,
10141 self.cfg.GetGroupDiskParams(nodegroup))
10143 iobj = objects.Instance(name=instance, os=self.op.os_type,
10144 primary_node=pnode_name,
10145 nics=self.nics, disks=disks,
10146 disk_template=self.op.disk_template,
10147 admin_state=constants.ADMINST_DOWN,
10148 network_port=network_port,
10149 beparams=self.op.beparams,
10150 hvparams=self.op.hvparams,
10151 hypervisor=self.op.hypervisor,
10152 osparams=self.op.osparams,
10156 for tag in self.op.tags:
10159 if self.adopt_disks:
10160 if self.op.disk_template == constants.DT_PLAIN:
10161 # rename LVs to the newly-generated names; we need to construct
10162 # 'fake' LV disks with the old data, plus the new unique_id
10163 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10165 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10166 rename_to.append(t_dsk.logical_id)
10167 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10168 self.cfg.SetDiskID(t_dsk, pnode_name)
10169 result = self.rpc.call_blockdev_rename(pnode_name,
10170 zip(tmp_disks, rename_to))
10171 result.Raise("Failed to rename adoped LVs")
10173 feedback_fn("* creating instance disks...")
10175 _CreateDisks(self, iobj)
10176 except errors.OpExecError:
10177 self.LogWarning("Device creation failed, reverting...")
10179 _RemoveDisks(self, iobj)
10181 self.cfg.ReleaseDRBDMinors(instance)
10184 feedback_fn("adding instance %s to cluster config" % instance)
10186 self.cfg.AddInstance(iobj, self.proc.GetECId())
10188 # Declare that we don't want to remove the instance lock anymore, as we've
10189 # added the instance to the config
10190 del self.remove_locks[locking.LEVEL_INSTANCE]
10192 if self.op.mode == constants.INSTANCE_IMPORT:
10193 # Release unused nodes
10194 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10196 # Release all nodes
10197 _ReleaseLocks(self, locking.LEVEL_NODE)
10200 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10201 feedback_fn("* wiping instance disks...")
10203 _WipeDisks(self, iobj)
10204 except errors.OpExecError, err:
10205 logging.exception("Wiping disks failed")
10206 self.LogWarning("Wiping instance disks failed (%s)", err)
10210 # Something is already wrong with the disks, don't do anything else
10212 elif self.op.wait_for_sync:
10213 disk_abort = not _WaitForSync(self, iobj)
10214 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10215 # make sure the disks are not degraded (still sync-ing is ok)
10216 feedback_fn("* checking mirrors status")
10217 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10222 _RemoveDisks(self, iobj)
10223 self.cfg.RemoveInstance(iobj.name)
10224 # Make sure the instance lock gets removed
10225 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10226 raise errors.OpExecError("There are some degraded disks for"
10229 # Release all node resource locks
10230 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10232 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10233 # we need to set the disks ID to the primary node, since the
10234 # preceding code might or might have not done it, depending on
10235 # disk template and other options
10236 for disk in iobj.disks:
10237 self.cfg.SetDiskID(disk, pnode_name)
10238 if self.op.mode == constants.INSTANCE_CREATE:
10239 if not self.op.no_install:
10240 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10241 not self.op.wait_for_sync)
10243 feedback_fn("* pausing disk sync to install instance OS")
10244 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10247 for idx, success in enumerate(result.payload):
10249 logging.warn("pause-sync of instance %s for disk %d failed",
10252 feedback_fn("* running the instance OS create scripts...")
10253 # FIXME: pass debug option from opcode to backend
10255 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10256 self.op.debug_level)
10258 feedback_fn("* resuming disk sync")
10259 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10262 for idx, success in enumerate(result.payload):
10264 logging.warn("resume-sync of instance %s for disk %d failed",
10267 os_add_result.Raise("Could not add os for instance %s"
10268 " on node %s" % (instance, pnode_name))
10271 if self.op.mode == constants.INSTANCE_IMPORT:
10272 feedback_fn("* running the instance OS import scripts...")
10276 for idx, image in enumerate(self.src_images):
10280 # FIXME: pass debug option from opcode to backend
10281 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10282 constants.IEIO_FILE, (image, ),
10283 constants.IEIO_SCRIPT,
10284 (iobj.disks[idx], idx),
10286 transfers.append(dt)
10289 masterd.instance.TransferInstanceData(self, feedback_fn,
10290 self.op.src_node, pnode_name,
10291 self.pnode.secondary_ip,
10293 if not compat.all(import_result):
10294 self.LogWarning("Some disks for instance %s on node %s were not"
10295 " imported successfully" % (instance, pnode_name))
10297 rename_from = self._old_instance_name
10299 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10300 feedback_fn("* preparing remote import...")
10301 # The source cluster will stop the instance before attempting to make
10302 # a connection. In some cases stopping an instance can take a long
10303 # time, hence the shutdown timeout is added to the connection
10305 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10306 self.op.source_shutdown_timeout)
10307 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10309 assert iobj.primary_node == self.pnode.name
10311 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10312 self.source_x509_ca,
10313 self._cds, timeouts)
10314 if not compat.all(disk_results):
10315 # TODO: Should the instance still be started, even if some disks
10316 # failed to import (valid for local imports, too)?
10317 self.LogWarning("Some disks for instance %s on node %s were not"
10318 " imported successfully" % (instance, pnode_name))
10320 rename_from = self.source_instance_name
10323 # also checked in the prereq part
10324 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10327 # Run rename script on newly imported instance
10328 assert iobj.name == instance
10329 feedback_fn("Running rename script for %s" % instance)
10330 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10332 self.op.debug_level)
10333 if result.fail_msg:
10334 self.LogWarning("Failed to run rename script for %s on node"
10335 " %s: %s" % (instance, pnode_name, result.fail_msg))
10337 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10340 iobj.admin_state = constants.ADMINST_UP
10341 self.cfg.Update(iobj, feedback_fn)
10342 logging.info("Starting instance %s on node %s", instance, pnode_name)
10343 feedback_fn("* starting instance...")
10344 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10346 result.Raise("Could not start instance")
10348 return list(iobj.all_nodes)
10351 def _CheckRADOSFreeSpace():
10352 """Compute disk size requirements inside the RADOS cluster.
10355 # For the RADOS cluster we assume there is always enough space.
10359 class LUInstanceConsole(NoHooksLU):
10360 """Connect to an instance's console.
10362 This is somewhat special in that it returns the command line that
10363 you need to run on the master node in order to connect to the
10369 def ExpandNames(self):
10370 self.share_locks = _ShareAll()
10371 self._ExpandAndLockInstance()
10373 def CheckPrereq(self):
10374 """Check prerequisites.
10376 This checks that the instance is in the cluster.
10379 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10380 assert self.instance is not None, \
10381 "Cannot retrieve locked instance %s" % self.op.instance_name
10382 _CheckNodeOnline(self, self.instance.primary_node)
10384 def Exec(self, feedback_fn):
10385 """Connect to the console of an instance
10388 instance = self.instance
10389 node = instance.primary_node
10391 node_insts = self.rpc.call_instance_list([node],
10392 [instance.hypervisor])[node]
10393 node_insts.Raise("Can't get node information from %s" % node)
10395 if instance.name not in node_insts.payload:
10396 if instance.admin_state == constants.ADMINST_UP:
10397 state = constants.INSTST_ERRORDOWN
10398 elif instance.admin_state == constants.ADMINST_DOWN:
10399 state = constants.INSTST_ADMINDOWN
10401 state = constants.INSTST_ADMINOFFLINE
10402 raise errors.OpExecError("Instance %s is not running (state %s)" %
10403 (instance.name, state))
10405 logging.debug("Connecting to console of %s on %s", instance.name, node)
10407 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10410 def _GetInstanceConsole(cluster, instance):
10411 """Returns console information for an instance.
10413 @type cluster: L{objects.Cluster}
10414 @type instance: L{objects.Instance}
10418 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10419 # beparams and hvparams are passed separately, to avoid editing the
10420 # instance and then saving the defaults in the instance itself.
10421 hvparams = cluster.FillHV(instance)
10422 beparams = cluster.FillBE(instance)
10423 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10425 assert console.instance == instance.name
10426 assert console.Validate()
10428 return console.ToDict()
10431 class LUInstanceReplaceDisks(LogicalUnit):
10432 """Replace the disks of an instance.
10435 HPATH = "mirrors-replace"
10436 HTYPE = constants.HTYPE_INSTANCE
10439 def CheckArguments(self):
10440 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10441 self.op.iallocator)
10443 def ExpandNames(self):
10444 self._ExpandAndLockInstance()
10446 assert locking.LEVEL_NODE not in self.needed_locks
10447 assert locking.LEVEL_NODE_RES not in self.needed_locks
10448 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10450 assert self.op.iallocator is None or self.op.remote_node is None, \
10451 "Conflicting options"
10453 if self.op.remote_node is not None:
10454 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10456 # Warning: do not remove the locking of the new secondary here
10457 # unless DRBD8.AddChildren is changed to work in parallel;
10458 # currently it doesn't since parallel invocations of
10459 # FindUnusedMinor will conflict
10460 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10461 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10463 self.needed_locks[locking.LEVEL_NODE] = []
10464 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10466 if self.op.iallocator is not None:
10467 # iallocator will select a new node in the same group
10468 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10470 self.needed_locks[locking.LEVEL_NODE_RES] = []
10472 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10473 self.op.iallocator, self.op.remote_node,
10474 self.op.disks, False, self.op.early_release,
10475 self.op.ignore_ipolicy)
10477 self.tasklets = [self.replacer]
10479 def DeclareLocks(self, level):
10480 if level == locking.LEVEL_NODEGROUP:
10481 assert self.op.remote_node is None
10482 assert self.op.iallocator is not None
10483 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10485 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10486 # Lock all groups used by instance optimistically; this requires going
10487 # via the node before it's locked, requiring verification later on
10488 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10489 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10491 elif level == locking.LEVEL_NODE:
10492 if self.op.iallocator is not None:
10493 assert self.op.remote_node is None
10494 assert not self.needed_locks[locking.LEVEL_NODE]
10496 # Lock member nodes of all locked groups
10497 self.needed_locks[locking.LEVEL_NODE] = [node_name
10498 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10499 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10501 self._LockInstancesNodes()
10502 elif level == locking.LEVEL_NODE_RES:
10504 self.needed_locks[locking.LEVEL_NODE_RES] = \
10505 self.needed_locks[locking.LEVEL_NODE]
10507 def BuildHooksEnv(self):
10508 """Build hooks env.
10510 This runs on the master, the primary and all the secondaries.
10513 instance = self.replacer.instance
10515 "MODE": self.op.mode,
10516 "NEW_SECONDARY": self.op.remote_node,
10517 "OLD_SECONDARY": instance.secondary_nodes[0],
10519 env.update(_BuildInstanceHookEnvByObject(self, instance))
10522 def BuildHooksNodes(self):
10523 """Build hooks nodes.
10526 instance = self.replacer.instance
10528 self.cfg.GetMasterNode(),
10529 instance.primary_node,
10531 if self.op.remote_node is not None:
10532 nl.append(self.op.remote_node)
10535 def CheckPrereq(self):
10536 """Check prerequisites.
10539 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10540 self.op.iallocator is None)
10542 # Verify if node group locks are still correct
10543 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10545 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10547 return LogicalUnit.CheckPrereq(self)
10550 class TLReplaceDisks(Tasklet):
10551 """Replaces disks for an instance.
10553 Note: Locking is not within the scope of this class.
10556 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10557 disks, delay_iallocator, early_release, ignore_ipolicy):
10558 """Initializes this class.
10561 Tasklet.__init__(self, lu)
10564 self.instance_name = instance_name
10566 self.iallocator_name = iallocator_name
10567 self.remote_node = remote_node
10569 self.delay_iallocator = delay_iallocator
10570 self.early_release = early_release
10571 self.ignore_ipolicy = ignore_ipolicy
10574 self.instance = None
10575 self.new_node = None
10576 self.target_node = None
10577 self.other_node = None
10578 self.remote_node_info = None
10579 self.node_secondary_ip = None
10582 def CheckArguments(mode, remote_node, iallocator):
10583 """Helper function for users of this class.
10586 # check for valid parameter combination
10587 if mode == constants.REPLACE_DISK_CHG:
10588 if remote_node is None and iallocator is None:
10589 raise errors.OpPrereqError("When changing the secondary either an"
10590 " iallocator script must be used or the"
10591 " new node given", errors.ECODE_INVAL)
10593 if remote_node is not None and iallocator is not None:
10594 raise errors.OpPrereqError("Give either the iallocator or the new"
10595 " secondary, not both", errors.ECODE_INVAL)
10597 elif remote_node is not None or iallocator is not None:
10598 # Not replacing the secondary
10599 raise errors.OpPrereqError("The iallocator and new node options can"
10600 " only be used when changing the"
10601 " secondary node", errors.ECODE_INVAL)
10604 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10605 """Compute a new secondary node using an IAllocator.
10608 ial = IAllocator(lu.cfg, lu.rpc,
10609 mode=constants.IALLOCATOR_MODE_RELOC,
10610 name=instance_name,
10611 relocate_from=list(relocate_from))
10613 ial.Run(iallocator_name)
10615 if not ial.success:
10616 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10617 " %s" % (iallocator_name, ial.info),
10618 errors.ECODE_NORES)
10620 if len(ial.result) != ial.required_nodes:
10621 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10622 " of nodes (%s), required %s" %
10624 len(ial.result), ial.required_nodes),
10625 errors.ECODE_FAULT)
10627 remote_node_name = ial.result[0]
10629 lu.LogInfo("Selected new secondary for instance '%s': %s",
10630 instance_name, remote_node_name)
10632 return remote_node_name
10634 def _FindFaultyDisks(self, node_name):
10635 """Wrapper for L{_FindFaultyInstanceDisks}.
10638 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10641 def _CheckDisksActivated(self, instance):
10642 """Checks if the instance disks are activated.
10644 @param instance: The instance to check disks
10645 @return: True if they are activated, False otherwise
10648 nodes = instance.all_nodes
10650 for idx, dev in enumerate(instance.disks):
10652 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10653 self.cfg.SetDiskID(dev, node)
10655 result = _BlockdevFind(self, node, dev, instance)
10659 elif result.fail_msg or not result.payload:
10664 def CheckPrereq(self):
10665 """Check prerequisites.
10667 This checks that the instance is in the cluster.
10670 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10671 assert instance is not None, \
10672 "Cannot retrieve locked instance %s" % self.instance_name
10674 if instance.disk_template != constants.DT_DRBD8:
10675 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10676 " instances", errors.ECODE_INVAL)
10678 if len(instance.secondary_nodes) != 1:
10679 raise errors.OpPrereqError("The instance has a strange layout,"
10680 " expected one secondary but found %d" %
10681 len(instance.secondary_nodes),
10682 errors.ECODE_FAULT)
10684 if not self.delay_iallocator:
10685 self._CheckPrereq2()
10687 def _CheckPrereq2(self):
10688 """Check prerequisites, second part.
10690 This function should always be part of CheckPrereq. It was separated and is
10691 now called from Exec because during node evacuation iallocator was only
10692 called with an unmodified cluster model, not taking planned changes into
10696 instance = self.instance
10697 secondary_node = instance.secondary_nodes[0]
10699 if self.iallocator_name is None:
10700 remote_node = self.remote_node
10702 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10703 instance.name, instance.secondary_nodes)
10705 if remote_node is None:
10706 self.remote_node_info = None
10708 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10709 "Remote node '%s' is not locked" % remote_node
10711 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10712 assert self.remote_node_info is not None, \
10713 "Cannot retrieve locked node %s" % remote_node
10715 if remote_node == self.instance.primary_node:
10716 raise errors.OpPrereqError("The specified node is the primary node of"
10717 " the instance", errors.ECODE_INVAL)
10719 if remote_node == secondary_node:
10720 raise errors.OpPrereqError("The specified node is already the"
10721 " secondary node of the instance",
10722 errors.ECODE_INVAL)
10724 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10725 constants.REPLACE_DISK_CHG):
10726 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10727 errors.ECODE_INVAL)
10729 if self.mode == constants.REPLACE_DISK_AUTO:
10730 if not self._CheckDisksActivated(instance):
10731 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10732 " first" % self.instance_name,
10733 errors.ECODE_STATE)
10734 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10735 faulty_secondary = self._FindFaultyDisks(secondary_node)
10737 if faulty_primary and faulty_secondary:
10738 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10739 " one node and can not be repaired"
10740 " automatically" % self.instance_name,
10741 errors.ECODE_STATE)
10744 self.disks = faulty_primary
10745 self.target_node = instance.primary_node
10746 self.other_node = secondary_node
10747 check_nodes = [self.target_node, self.other_node]
10748 elif faulty_secondary:
10749 self.disks = faulty_secondary
10750 self.target_node = secondary_node
10751 self.other_node = instance.primary_node
10752 check_nodes = [self.target_node, self.other_node]
10758 # Non-automatic modes
10759 if self.mode == constants.REPLACE_DISK_PRI:
10760 self.target_node = instance.primary_node
10761 self.other_node = secondary_node
10762 check_nodes = [self.target_node, self.other_node]
10764 elif self.mode == constants.REPLACE_DISK_SEC:
10765 self.target_node = secondary_node
10766 self.other_node = instance.primary_node
10767 check_nodes = [self.target_node, self.other_node]
10769 elif self.mode == constants.REPLACE_DISK_CHG:
10770 self.new_node = remote_node
10771 self.other_node = instance.primary_node
10772 self.target_node = secondary_node
10773 check_nodes = [self.new_node, self.other_node]
10775 _CheckNodeNotDrained(self.lu, remote_node)
10776 _CheckNodeVmCapable(self.lu, remote_node)
10778 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10779 assert old_node_info is not None
10780 if old_node_info.offline and not self.early_release:
10781 # doesn't make sense to delay the release
10782 self.early_release = True
10783 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10784 " early-release mode", secondary_node)
10787 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10790 # If not specified all disks should be replaced
10792 self.disks = range(len(self.instance.disks))
10794 # TODO: This is ugly, but right now we can't distinguish between internal
10795 # submitted opcode and external one. We should fix that.
10796 if self.remote_node_info:
10797 # We change the node, lets verify it still meets instance policy
10798 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10799 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10801 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10802 ignore=self.ignore_ipolicy)
10804 for node in check_nodes:
10805 _CheckNodeOnline(self.lu, node)
10807 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10810 if node_name is not None)
10812 # Release unneeded node and node resource locks
10813 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10814 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10816 # Release any owned node group
10817 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10818 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10820 # Check whether disks are valid
10821 for disk_idx in self.disks:
10822 instance.FindDisk(disk_idx)
10824 # Get secondary node IP addresses
10825 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10826 in self.cfg.GetMultiNodeInfo(touched_nodes))
10828 def Exec(self, feedback_fn):
10829 """Execute disk replacement.
10831 This dispatches the disk replacement to the appropriate handler.
10834 if self.delay_iallocator:
10835 self._CheckPrereq2()
10838 # Verify owned locks before starting operation
10839 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10840 assert set(owned_nodes) == set(self.node_secondary_ip), \
10841 ("Incorrect node locks, owning %s, expected %s" %
10842 (owned_nodes, self.node_secondary_ip.keys()))
10843 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10844 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10846 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10847 assert list(owned_instances) == [self.instance_name], \
10848 "Instance '%s' not locked" % self.instance_name
10850 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10851 "Should not own any node group lock at this point"
10854 feedback_fn("No disks need replacement")
10857 feedback_fn("Replacing disk(s) %s for %s" %
10858 (utils.CommaJoin(self.disks), self.instance.name))
10860 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10862 # Activate the instance disks if we're replacing them on a down instance
10864 _StartInstanceDisks(self.lu, self.instance, True)
10867 # Should we replace the secondary node?
10868 if self.new_node is not None:
10869 fn = self._ExecDrbd8Secondary
10871 fn = self._ExecDrbd8DiskOnly
10873 result = fn(feedback_fn)
10875 # Deactivate the instance disks if we're replacing them on a
10878 _SafeShutdownInstanceDisks(self.lu, self.instance)
10880 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10883 # Verify owned locks
10884 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10885 nodes = frozenset(self.node_secondary_ip)
10886 assert ((self.early_release and not owned_nodes) or
10887 (not self.early_release and not (set(owned_nodes) - nodes))), \
10888 ("Not owning the correct locks, early_release=%s, owned=%r,"
10889 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10893 def _CheckVolumeGroup(self, nodes):
10894 self.lu.LogInfo("Checking volume groups")
10896 vgname = self.cfg.GetVGName()
10898 # Make sure volume group exists on all involved nodes
10899 results = self.rpc.call_vg_list(nodes)
10901 raise errors.OpExecError("Can't list volume groups on the nodes")
10904 res = results[node]
10905 res.Raise("Error checking node %s" % node)
10906 if vgname not in res.payload:
10907 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10910 def _CheckDisksExistence(self, nodes):
10911 # Check disk existence
10912 for idx, dev in enumerate(self.instance.disks):
10913 if idx not in self.disks:
10917 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10918 self.cfg.SetDiskID(dev, node)
10920 result = _BlockdevFind(self, node, dev, self.instance)
10922 msg = result.fail_msg
10923 if msg or not result.payload:
10925 msg = "disk not found"
10926 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10929 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10930 for idx, dev in enumerate(self.instance.disks):
10931 if idx not in self.disks:
10934 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10937 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10938 on_primary, ldisk=ldisk):
10939 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10940 " replace disks for instance %s" %
10941 (node_name, self.instance.name))
10943 def _CreateNewStorage(self, node_name):
10944 """Create new storage on the primary or secondary node.
10946 This is only used for same-node replaces, not for changing the
10947 secondary node, hence we don't want to modify the existing disk.
10952 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10953 for idx, dev in enumerate(disks):
10954 if idx not in self.disks:
10957 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10959 self.cfg.SetDiskID(dev, node_name)
10961 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10962 names = _GenerateUniqueNames(self.lu, lv_names)
10964 (data_disk, meta_disk) = dev.children
10965 vg_data = data_disk.logical_id[0]
10966 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10967 logical_id=(vg_data, names[0]),
10968 params=data_disk.params)
10969 vg_meta = meta_disk.logical_id[0]
10970 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10971 logical_id=(vg_meta, names[1]),
10972 params=meta_disk.params)
10974 new_lvs = [lv_data, lv_meta]
10975 old_lvs = [child.Copy() for child in dev.children]
10976 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10978 # we pass force_create=True to force the LVM creation
10979 for new_lv in new_lvs:
10980 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10981 _GetInstanceInfoText(self.instance), False)
10985 def _CheckDevices(self, node_name, iv_names):
10986 for name, (dev, _, _) in iv_names.iteritems():
10987 self.cfg.SetDiskID(dev, node_name)
10989 result = _BlockdevFind(self, node_name, dev, self.instance)
10991 msg = result.fail_msg
10992 if msg or not result.payload:
10994 msg = "disk not found"
10995 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10998 if result.payload.is_degraded:
10999 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11001 def _RemoveOldStorage(self, node_name, iv_names):
11002 for name, (_, old_lvs, _) in iv_names.iteritems():
11003 self.lu.LogInfo("Remove logical volumes for %s" % name)
11006 self.cfg.SetDiskID(lv, node_name)
11008 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11010 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11011 hint="remove unused LVs manually")
11013 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11014 """Replace a disk on the primary or secondary for DRBD 8.
11016 The algorithm for replace is quite complicated:
11018 1. for each disk to be replaced:
11020 1. create new LVs on the target node with unique names
11021 1. detach old LVs from the drbd device
11022 1. rename old LVs to name_replaced.<time_t>
11023 1. rename new LVs to old LVs
11024 1. attach the new LVs (with the old names now) to the drbd device
11026 1. wait for sync across all devices
11028 1. for each modified disk:
11030 1. remove old LVs (which have the name name_replaces.<time_t>)
11032 Failures are not very well handled.
11037 # Step: check device activation
11038 self.lu.LogStep(1, steps_total, "Check device existence")
11039 self._CheckDisksExistence([self.other_node, self.target_node])
11040 self._CheckVolumeGroup([self.target_node, self.other_node])
11042 # Step: check other node consistency
11043 self.lu.LogStep(2, steps_total, "Check peer consistency")
11044 self._CheckDisksConsistency(self.other_node,
11045 self.other_node == self.instance.primary_node,
11048 # Step: create new storage
11049 self.lu.LogStep(3, steps_total, "Allocate new storage")
11050 iv_names = self._CreateNewStorage(self.target_node)
11052 # Step: for each lv, detach+rename*2+attach
11053 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11054 for dev, old_lvs, new_lvs in iv_names.itervalues():
11055 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11057 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11059 result.Raise("Can't detach drbd from local storage on node"
11060 " %s for device %s" % (self.target_node, dev.iv_name))
11062 #cfg.Update(instance)
11064 # ok, we created the new LVs, so now we know we have the needed
11065 # storage; as such, we proceed on the target node to rename
11066 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11067 # using the assumption that logical_id == physical_id (which in
11068 # turn is the unique_id on that node)
11070 # FIXME(iustin): use a better name for the replaced LVs
11071 temp_suffix = int(time.time())
11072 ren_fn = lambda d, suff: (d.physical_id[0],
11073 d.physical_id[1] + "_replaced-%s" % suff)
11075 # Build the rename list based on what LVs exist on the node
11076 rename_old_to_new = []
11077 for to_ren in old_lvs:
11078 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11079 if not result.fail_msg and result.payload:
11081 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11083 self.lu.LogInfo("Renaming the old LVs on the target node")
11084 result = self.rpc.call_blockdev_rename(self.target_node,
11086 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11088 # Now we rename the new LVs to the old LVs
11089 self.lu.LogInfo("Renaming the new LVs on the target node")
11090 rename_new_to_old = [(new, old.physical_id)
11091 for old, new in zip(old_lvs, new_lvs)]
11092 result = self.rpc.call_blockdev_rename(self.target_node,
11094 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11096 # Intermediate steps of in memory modifications
11097 for old, new in zip(old_lvs, new_lvs):
11098 new.logical_id = old.logical_id
11099 self.cfg.SetDiskID(new, self.target_node)
11101 # We need to modify old_lvs so that removal later removes the
11102 # right LVs, not the newly added ones; note that old_lvs is a
11104 for disk in old_lvs:
11105 disk.logical_id = ren_fn(disk, temp_suffix)
11106 self.cfg.SetDiskID(disk, self.target_node)
11108 # Now that the new lvs have the old name, we can add them to the device
11109 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11110 result = self.rpc.call_blockdev_addchildren(self.target_node,
11111 (dev, self.instance), new_lvs)
11112 msg = result.fail_msg
11114 for new_lv in new_lvs:
11115 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11118 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11119 hint=("cleanup manually the unused logical"
11121 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11123 cstep = itertools.count(5)
11125 if self.early_release:
11126 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11127 self._RemoveOldStorage(self.target_node, iv_names)
11128 # TODO: Check if releasing locks early still makes sense
11129 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11131 # Release all resource locks except those used by the instance
11132 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11133 keep=self.node_secondary_ip.keys())
11135 # Release all node locks while waiting for sync
11136 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11138 # TODO: Can the instance lock be downgraded here? Take the optional disk
11139 # shutdown in the caller into consideration.
11142 # This can fail as the old devices are degraded and _WaitForSync
11143 # does a combined result over all disks, so we don't check its return value
11144 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11145 _WaitForSync(self.lu, self.instance)
11147 # Check all devices manually
11148 self._CheckDevices(self.instance.primary_node, iv_names)
11150 # Step: remove old storage
11151 if not self.early_release:
11152 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11153 self._RemoveOldStorage(self.target_node, iv_names)
11155 def _ExecDrbd8Secondary(self, feedback_fn):
11156 """Replace the secondary node for DRBD 8.
11158 The algorithm for replace is quite complicated:
11159 - for all disks of the instance:
11160 - create new LVs on the new node with same names
11161 - shutdown the drbd device on the old secondary
11162 - disconnect the drbd network on the primary
11163 - create the drbd device on the new secondary
11164 - network attach the drbd on the primary, using an artifice:
11165 the drbd code for Attach() will connect to the network if it
11166 finds a device which is connected to the good local disks but
11167 not network enabled
11168 - wait for sync across all devices
11169 - remove all disks from the old secondary
11171 Failures are not very well handled.
11176 pnode = self.instance.primary_node
11178 # Step: check device activation
11179 self.lu.LogStep(1, steps_total, "Check device existence")
11180 self._CheckDisksExistence([self.instance.primary_node])
11181 self._CheckVolumeGroup([self.instance.primary_node])
11183 # Step: check other node consistency
11184 self.lu.LogStep(2, steps_total, "Check peer consistency")
11185 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11187 # Step: create new storage
11188 self.lu.LogStep(3, steps_total, "Allocate new storage")
11189 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11190 for idx, dev in enumerate(disks):
11191 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11192 (self.new_node, idx))
11193 # we pass force_create=True to force LVM creation
11194 for new_lv in dev.children:
11195 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11196 True, _GetInstanceInfoText(self.instance), False)
11198 # Step 4: dbrd minors and drbd setups changes
11199 # after this, we must manually remove the drbd minors on both the
11200 # error and the success paths
11201 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11202 minors = self.cfg.AllocateDRBDMinor([self.new_node
11203 for dev in self.instance.disks],
11204 self.instance.name)
11205 logging.debug("Allocated minors %r", minors)
11208 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11209 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11210 (self.new_node, idx))
11211 # create new devices on new_node; note that we create two IDs:
11212 # one without port, so the drbd will be activated without
11213 # networking information on the new node at this stage, and one
11214 # with network, for the latter activation in step 4
11215 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11216 if self.instance.primary_node == o_node1:
11219 assert self.instance.primary_node == o_node2, "Three-node instance?"
11222 new_alone_id = (self.instance.primary_node, self.new_node, None,
11223 p_minor, new_minor, o_secret)
11224 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11225 p_minor, new_minor, o_secret)
11227 iv_names[idx] = (dev, dev.children, new_net_id)
11228 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11230 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11231 logical_id=new_alone_id,
11232 children=dev.children,
11235 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11238 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11240 _GetInstanceInfoText(self.instance), False)
11241 except errors.GenericError:
11242 self.cfg.ReleaseDRBDMinors(self.instance.name)
11245 # We have new devices, shutdown the drbd on the old secondary
11246 for idx, dev in enumerate(self.instance.disks):
11247 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11248 self.cfg.SetDiskID(dev, self.target_node)
11249 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11250 (dev, self.instance)).fail_msg
11252 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11253 "node: %s" % (idx, msg),
11254 hint=("Please cleanup this device manually as"
11255 " soon as possible"))
11257 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11258 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11259 self.instance.disks)[pnode]
11261 msg = result.fail_msg
11263 # detaches didn't succeed (unlikely)
11264 self.cfg.ReleaseDRBDMinors(self.instance.name)
11265 raise errors.OpExecError("Can't detach the disks from the network on"
11266 " old node: %s" % (msg,))
11268 # if we managed to detach at least one, we update all the disks of
11269 # the instance to point to the new secondary
11270 self.lu.LogInfo("Updating instance configuration")
11271 for dev, _, new_logical_id in iv_names.itervalues():
11272 dev.logical_id = new_logical_id
11273 self.cfg.SetDiskID(dev, self.instance.primary_node)
11275 self.cfg.Update(self.instance, feedback_fn)
11277 # Release all node locks (the configuration has been updated)
11278 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11280 # and now perform the drbd attach
11281 self.lu.LogInfo("Attaching primary drbds to new secondary"
11282 " (standalone => connected)")
11283 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11285 self.node_secondary_ip,
11286 (self.instance.disks, self.instance),
11287 self.instance.name,
11289 for to_node, to_result in result.items():
11290 msg = to_result.fail_msg
11292 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11294 hint=("please do a gnt-instance info to see the"
11295 " status of disks"))
11297 cstep = itertools.count(5)
11299 if self.early_release:
11300 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11301 self._RemoveOldStorage(self.target_node, iv_names)
11302 # TODO: Check if releasing locks early still makes sense
11303 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11305 # Release all resource locks except those used by the instance
11306 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11307 keep=self.node_secondary_ip.keys())
11309 # TODO: Can the instance lock be downgraded here? Take the optional disk
11310 # shutdown in the caller into consideration.
11313 # This can fail as the old devices are degraded and _WaitForSync
11314 # does a combined result over all disks, so we don't check its return value
11315 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11316 _WaitForSync(self.lu, self.instance)
11318 # Check all devices manually
11319 self._CheckDevices(self.instance.primary_node, iv_names)
11321 # Step: remove old storage
11322 if not self.early_release:
11323 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11324 self._RemoveOldStorage(self.target_node, iv_names)
11327 class LURepairNodeStorage(NoHooksLU):
11328 """Repairs the volume group on a node.
11333 def CheckArguments(self):
11334 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11336 storage_type = self.op.storage_type
11338 if (constants.SO_FIX_CONSISTENCY not in
11339 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11340 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11341 " repaired" % storage_type,
11342 errors.ECODE_INVAL)
11344 def ExpandNames(self):
11345 self.needed_locks = {
11346 locking.LEVEL_NODE: [self.op.node_name],
11349 def _CheckFaultyDisks(self, instance, node_name):
11350 """Ensure faulty disks abort the opcode or at least warn."""
11352 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11354 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11355 " node '%s'" % (instance.name, node_name),
11356 errors.ECODE_STATE)
11357 except errors.OpPrereqError, err:
11358 if self.op.ignore_consistency:
11359 self.proc.LogWarning(str(err.args[0]))
11363 def CheckPrereq(self):
11364 """Check prerequisites.
11367 # Check whether any instance on this node has faulty disks
11368 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11369 if inst.admin_state != constants.ADMINST_UP:
11371 check_nodes = set(inst.all_nodes)
11372 check_nodes.discard(self.op.node_name)
11373 for inst_node_name in check_nodes:
11374 self._CheckFaultyDisks(inst, inst_node_name)
11376 def Exec(self, feedback_fn):
11377 feedback_fn("Repairing storage unit '%s' on %s ..." %
11378 (self.op.name, self.op.node_name))
11380 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11381 result = self.rpc.call_storage_execute(self.op.node_name,
11382 self.op.storage_type, st_args,
11384 constants.SO_FIX_CONSISTENCY)
11385 result.Raise("Failed to repair storage unit '%s' on %s" %
11386 (self.op.name, self.op.node_name))
11389 class LUNodeEvacuate(NoHooksLU):
11390 """Evacuates instances off a list of nodes.
11395 _MODE2IALLOCATOR = {
11396 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11397 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11398 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11400 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11401 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11402 constants.IALLOCATOR_NEVAC_MODES)
11404 def CheckArguments(self):
11405 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11407 def ExpandNames(self):
11408 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11410 if self.op.remote_node is not None:
11411 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11412 assert self.op.remote_node
11414 if self.op.remote_node == self.op.node_name:
11415 raise errors.OpPrereqError("Can not use evacuated node as a new"
11416 " secondary node", errors.ECODE_INVAL)
11418 if self.op.mode != constants.NODE_EVAC_SEC:
11419 raise errors.OpPrereqError("Without the use of an iallocator only"
11420 " secondary instances can be evacuated",
11421 errors.ECODE_INVAL)
11424 self.share_locks = _ShareAll()
11425 self.needed_locks = {
11426 locking.LEVEL_INSTANCE: [],
11427 locking.LEVEL_NODEGROUP: [],
11428 locking.LEVEL_NODE: [],
11431 # Determine nodes (via group) optimistically, needs verification once locks
11432 # have been acquired
11433 self.lock_nodes = self._DetermineNodes()
11435 def _DetermineNodes(self):
11436 """Gets the list of nodes to operate on.
11439 if self.op.remote_node is None:
11440 # Iallocator will choose any node(s) in the same group
11441 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11443 group_nodes = frozenset([self.op.remote_node])
11445 # Determine nodes to be locked
11446 return set([self.op.node_name]) | group_nodes
11448 def _DetermineInstances(self):
11449 """Builds list of instances to operate on.
11452 assert self.op.mode in constants.NODE_EVAC_MODES
11454 if self.op.mode == constants.NODE_EVAC_PRI:
11455 # Primary instances only
11456 inst_fn = _GetNodePrimaryInstances
11457 assert self.op.remote_node is None, \
11458 "Evacuating primary instances requires iallocator"
11459 elif self.op.mode == constants.NODE_EVAC_SEC:
11460 # Secondary instances only
11461 inst_fn = _GetNodeSecondaryInstances
11464 assert self.op.mode == constants.NODE_EVAC_ALL
11465 inst_fn = _GetNodeInstances
11466 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11468 raise errors.OpPrereqError("Due to an issue with the iallocator"
11469 " interface it is not possible to evacuate"
11470 " all instances at once; specify explicitly"
11471 " whether to evacuate primary or secondary"
11473 errors.ECODE_INVAL)
11475 return inst_fn(self.cfg, self.op.node_name)
11477 def DeclareLocks(self, level):
11478 if level == locking.LEVEL_INSTANCE:
11479 # Lock instances optimistically, needs verification once node and group
11480 # locks have been acquired
11481 self.needed_locks[locking.LEVEL_INSTANCE] = \
11482 set(i.name for i in self._DetermineInstances())
11484 elif level == locking.LEVEL_NODEGROUP:
11485 # Lock node groups for all potential target nodes optimistically, needs
11486 # verification once nodes have been acquired
11487 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11488 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11490 elif level == locking.LEVEL_NODE:
11491 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11493 def CheckPrereq(self):
11495 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11496 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11497 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11499 need_nodes = self._DetermineNodes()
11501 if not owned_nodes.issuperset(need_nodes):
11502 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11503 " locks were acquired, current nodes are"
11504 " are '%s', used to be '%s'; retry the"
11506 (self.op.node_name,
11507 utils.CommaJoin(need_nodes),
11508 utils.CommaJoin(owned_nodes)),
11509 errors.ECODE_STATE)
11511 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11512 if owned_groups != wanted_groups:
11513 raise errors.OpExecError("Node groups changed since locks were acquired,"
11514 " current groups are '%s', used to be '%s';"
11515 " retry the operation" %
11516 (utils.CommaJoin(wanted_groups),
11517 utils.CommaJoin(owned_groups)))
11519 # Determine affected instances
11520 self.instances = self._DetermineInstances()
11521 self.instance_names = [i.name for i in self.instances]
11523 if set(self.instance_names) != owned_instances:
11524 raise errors.OpExecError("Instances on node '%s' changed since locks"
11525 " were acquired, current instances are '%s',"
11526 " used to be '%s'; retry the operation" %
11527 (self.op.node_name,
11528 utils.CommaJoin(self.instance_names),
11529 utils.CommaJoin(owned_instances)))
11531 if self.instance_names:
11532 self.LogInfo("Evacuating instances from node '%s': %s",
11534 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11536 self.LogInfo("No instances to evacuate from node '%s'",
11539 if self.op.remote_node is not None:
11540 for i in self.instances:
11541 if i.primary_node == self.op.remote_node:
11542 raise errors.OpPrereqError("Node %s is the primary node of"
11543 " instance %s, cannot use it as"
11545 (self.op.remote_node, i.name),
11546 errors.ECODE_INVAL)
11548 def Exec(self, feedback_fn):
11549 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11551 if not self.instance_names:
11552 # No instances to evacuate
11555 elif self.op.iallocator is not None:
11556 # TODO: Implement relocation to other group
11557 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11558 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11559 instances=list(self.instance_names))
11561 ial.Run(self.op.iallocator)
11563 if not ial.success:
11564 raise errors.OpPrereqError("Can't compute node evacuation using"
11565 " iallocator '%s': %s" %
11566 (self.op.iallocator, ial.info),
11567 errors.ECODE_NORES)
11569 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11571 elif self.op.remote_node is not None:
11572 assert self.op.mode == constants.NODE_EVAC_SEC
11574 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11575 remote_node=self.op.remote_node,
11577 mode=constants.REPLACE_DISK_CHG,
11578 early_release=self.op.early_release)]
11579 for instance_name in self.instance_names
11583 raise errors.ProgrammerError("No iallocator or remote node")
11585 return ResultWithJobs(jobs)
11588 def _SetOpEarlyRelease(early_release, op):
11589 """Sets C{early_release} flag on opcodes if available.
11593 op.early_release = early_release
11594 except AttributeError:
11595 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11600 def _NodeEvacDest(use_nodes, group, nodes):
11601 """Returns group or nodes depending on caller's choice.
11605 return utils.CommaJoin(nodes)
11610 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11611 """Unpacks the result of change-group and node-evacuate iallocator requests.
11613 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11614 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11616 @type lu: L{LogicalUnit}
11617 @param lu: Logical unit instance
11618 @type alloc_result: tuple/list
11619 @param alloc_result: Result from iallocator
11620 @type early_release: bool
11621 @param early_release: Whether to release locks early if possible
11622 @type use_nodes: bool
11623 @param use_nodes: Whether to display node names instead of groups
11626 (moved, failed, jobs) = alloc_result
11629 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11630 for (name, reason) in failed)
11631 lu.LogWarning("Unable to evacuate instances %s", failreason)
11632 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11635 lu.LogInfo("Instances to be moved: %s",
11636 utils.CommaJoin("%s (to %s)" %
11637 (name, _NodeEvacDest(use_nodes, group, nodes))
11638 for (name, group, nodes) in moved))
11640 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11641 map(opcodes.OpCode.LoadOpCode, ops))
11645 class LUInstanceGrowDisk(LogicalUnit):
11646 """Grow a disk of an instance.
11649 HPATH = "disk-grow"
11650 HTYPE = constants.HTYPE_INSTANCE
11653 def ExpandNames(self):
11654 self._ExpandAndLockInstance()
11655 self.needed_locks[locking.LEVEL_NODE] = []
11656 self.needed_locks[locking.LEVEL_NODE_RES] = []
11657 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11658 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11660 def DeclareLocks(self, level):
11661 if level == locking.LEVEL_NODE:
11662 self._LockInstancesNodes()
11663 elif level == locking.LEVEL_NODE_RES:
11665 self.needed_locks[locking.LEVEL_NODE_RES] = \
11666 self.needed_locks[locking.LEVEL_NODE][:]
11668 def BuildHooksEnv(self):
11669 """Build hooks env.
11671 This runs on the master, the primary and all the secondaries.
11675 "DISK": self.op.disk,
11676 "AMOUNT": self.op.amount,
11677 "ABSOLUTE": self.op.absolute,
11679 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11682 def BuildHooksNodes(self):
11683 """Build hooks nodes.
11686 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11689 def CheckPrereq(self):
11690 """Check prerequisites.
11692 This checks that the instance is in the cluster.
11695 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11696 assert instance is not None, \
11697 "Cannot retrieve locked instance %s" % self.op.instance_name
11698 nodenames = list(instance.all_nodes)
11699 for node in nodenames:
11700 _CheckNodeOnline(self, node)
11702 self.instance = instance
11704 if instance.disk_template not in constants.DTS_GROWABLE:
11705 raise errors.OpPrereqError("Instance's disk layout does not support"
11706 " growing", errors.ECODE_INVAL)
11708 self.disk = instance.FindDisk(self.op.disk)
11710 if self.op.absolute:
11711 self.target = self.op.amount
11712 self.delta = self.target - self.disk.size
11714 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11715 "current disk size (%s)" %
11716 (utils.FormatUnit(self.target, "h"),
11717 utils.FormatUnit(self.disk.size, "h")),
11718 errors.ECODE_STATE)
11720 self.delta = self.op.amount
11721 self.target = self.disk.size + self.delta
11723 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11724 utils.FormatUnit(self.delta, "h"),
11725 errors.ECODE_INVAL)
11727 if instance.disk_template not in (constants.DT_FILE,
11728 constants.DT_SHARED_FILE,
11730 # TODO: check the free disk space for file, when that feature will be
11732 _CheckNodesFreeDiskPerVG(self, nodenames,
11733 self.disk.ComputeGrowth(self.delta))
11735 def Exec(self, feedback_fn):
11736 """Execute disk grow.
11739 instance = self.instance
11742 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11743 assert (self.owned_locks(locking.LEVEL_NODE) ==
11744 self.owned_locks(locking.LEVEL_NODE_RES))
11746 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11748 raise errors.OpExecError("Cannot activate block device to grow")
11750 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11751 (self.op.disk, instance.name,
11752 utils.FormatUnit(self.delta, "h"),
11753 utils.FormatUnit(self.target, "h")))
11755 # First run all grow ops in dry-run mode
11756 for node in instance.all_nodes:
11757 self.cfg.SetDiskID(disk, node)
11758 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11760 result.Raise("Grow request failed to node %s" % node)
11762 # We know that (as far as we can test) operations across different
11763 # nodes will succeed, time to run it for real on the backing storage
11764 for node in instance.all_nodes:
11765 self.cfg.SetDiskID(disk, node)
11766 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11768 result.Raise("Grow request failed to node %s" % node)
11770 # And now execute it for logical storage, on the primary node
11771 node = instance.primary_node
11772 self.cfg.SetDiskID(disk, node)
11773 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11775 result.Raise("Grow request failed to node %s" % node)
11777 disk.RecordGrow(self.delta)
11778 self.cfg.Update(instance, feedback_fn)
11780 # Changes have been recorded, release node lock
11781 _ReleaseLocks(self, locking.LEVEL_NODE)
11783 # Downgrade lock while waiting for sync
11784 self.glm.downgrade(locking.LEVEL_INSTANCE)
11786 if self.op.wait_for_sync:
11787 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11789 self.proc.LogWarning("Disk sync-ing has not returned a good"
11790 " status; please check the instance")
11791 if instance.admin_state != constants.ADMINST_UP:
11792 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11793 elif instance.admin_state != constants.ADMINST_UP:
11794 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11795 " not supposed to be running because no wait for"
11796 " sync mode was requested")
11798 assert self.owned_locks(locking.LEVEL_NODE_RES)
11799 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11802 class LUInstanceQueryData(NoHooksLU):
11803 """Query runtime instance data.
11808 def ExpandNames(self):
11809 self.needed_locks = {}
11811 # Use locking if requested or when non-static information is wanted
11812 if not (self.op.static or self.op.use_locking):
11813 self.LogWarning("Non-static data requested, locks need to be acquired")
11814 self.op.use_locking = True
11816 if self.op.instances or not self.op.use_locking:
11817 # Expand instance names right here
11818 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11820 # Will use acquired locks
11821 self.wanted_names = None
11823 if self.op.use_locking:
11824 self.share_locks = _ShareAll()
11826 if self.wanted_names is None:
11827 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11829 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11831 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11832 self.needed_locks[locking.LEVEL_NODE] = []
11833 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11835 def DeclareLocks(self, level):
11836 if self.op.use_locking:
11837 if level == locking.LEVEL_NODEGROUP:
11838 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11840 # Lock all groups used by instances optimistically; this requires going
11841 # via the node before it's locked, requiring verification later on
11842 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11843 frozenset(group_uuid
11844 for instance_name in owned_instances
11846 self.cfg.GetInstanceNodeGroups(instance_name))
11848 elif level == locking.LEVEL_NODE:
11849 self._LockInstancesNodes()
11851 def CheckPrereq(self):
11852 """Check prerequisites.
11854 This only checks the optional instance list against the existing names.
11857 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11858 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11859 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11861 if self.wanted_names is None:
11862 assert self.op.use_locking, "Locking was not used"
11863 self.wanted_names = owned_instances
11865 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11867 if self.op.use_locking:
11868 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11871 assert not (owned_instances or owned_groups or owned_nodes)
11873 self.wanted_instances = instances.values()
11875 def _ComputeBlockdevStatus(self, node, instance, dev):
11876 """Returns the status of a block device
11879 if self.op.static or not node:
11882 self.cfg.SetDiskID(dev, node)
11884 result = self.rpc.call_blockdev_find(node, dev)
11888 result.Raise("Can't compute disk status for %s" % instance.name)
11890 status = result.payload
11894 return (status.dev_path, status.major, status.minor,
11895 status.sync_percent, status.estimated_time,
11896 status.is_degraded, status.ldisk_status)
11898 def _ComputeDiskStatus(self, instance, snode, dev):
11899 """Compute block device status.
11902 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11904 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11906 def _ComputeDiskStatusInner(self, instance, snode, dev):
11907 """Compute block device status.
11909 @attention: The device has to be annotated already.
11912 if dev.dev_type in constants.LDS_DRBD:
11913 # we change the snode then (otherwise we use the one passed in)
11914 if dev.logical_id[0] == instance.primary_node:
11915 snode = dev.logical_id[1]
11917 snode = dev.logical_id[0]
11919 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11921 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11924 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11931 "iv_name": dev.iv_name,
11932 "dev_type": dev.dev_type,
11933 "logical_id": dev.logical_id,
11934 "physical_id": dev.physical_id,
11935 "pstatus": dev_pstatus,
11936 "sstatus": dev_sstatus,
11937 "children": dev_children,
11942 def Exec(self, feedback_fn):
11943 """Gather and return data"""
11946 cluster = self.cfg.GetClusterInfo()
11948 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11949 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11951 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11952 for node in nodes.values()))
11954 group2name_fn = lambda uuid: groups[uuid].name
11956 for instance in self.wanted_instances:
11957 pnode = nodes[instance.primary_node]
11959 if self.op.static or pnode.offline:
11960 remote_state = None
11962 self.LogWarning("Primary node %s is marked offline, returning static"
11963 " information only for instance %s" %
11964 (pnode.name, instance.name))
11966 remote_info = self.rpc.call_instance_info(instance.primary_node,
11968 instance.hypervisor)
11969 remote_info.Raise("Error checking node %s" % instance.primary_node)
11970 remote_info = remote_info.payload
11971 if remote_info and "state" in remote_info:
11972 remote_state = "up"
11974 if instance.admin_state == constants.ADMINST_UP:
11975 remote_state = "down"
11977 remote_state = instance.admin_state
11979 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11982 snodes_group_uuids = [nodes[snode_name].group
11983 for snode_name in instance.secondary_nodes]
11985 result[instance.name] = {
11986 "name": instance.name,
11987 "config_state": instance.admin_state,
11988 "run_state": remote_state,
11989 "pnode": instance.primary_node,
11990 "pnode_group_uuid": pnode.group,
11991 "pnode_group_name": group2name_fn(pnode.group),
11992 "snodes": instance.secondary_nodes,
11993 "snodes_group_uuids": snodes_group_uuids,
11994 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11996 # this happens to be the same format used for hooks
11997 "nics": _NICListToTuple(self, instance.nics),
11998 "disk_template": instance.disk_template,
12000 "hypervisor": instance.hypervisor,
12001 "network_port": instance.network_port,
12002 "hv_instance": instance.hvparams,
12003 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12004 "be_instance": instance.beparams,
12005 "be_actual": cluster.FillBE(instance),
12006 "os_instance": instance.osparams,
12007 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12008 "serial_no": instance.serial_no,
12009 "mtime": instance.mtime,
12010 "ctime": instance.ctime,
12011 "uuid": instance.uuid,
12017 def PrepareContainerMods(mods, private_fn):
12018 """Prepares a list of container modifications by adding a private data field.
12020 @type mods: list of tuples; (operation, index, parameters)
12021 @param mods: List of modifications
12022 @type private_fn: callable or None
12023 @param private_fn: Callable for constructing a private data field for a
12028 if private_fn is None:
12033 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12036 #: Type description for changes as returned by L{ApplyContainerMods}'s
12038 _TApplyContModsCbChanges = \
12039 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12040 ht.TNonEmptyString,
12045 def ApplyContainerMods(kind, container, chgdesc, mods,
12046 create_fn, modify_fn, remove_fn):
12047 """Applies descriptions in C{mods} to C{container}.
12050 @param kind: One-word item description
12051 @type container: list
12052 @param container: Container to modify
12053 @type chgdesc: None or list
12054 @param chgdesc: List of applied changes
12056 @param mods: Modifications as returned by L{PrepareContainerMods}
12057 @type create_fn: callable
12058 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12059 receives absolute item index, parameters and private data object as added
12060 by L{PrepareContainerMods}, returns tuple containing new item and changes
12062 @type modify_fn: callable
12063 @param modify_fn: Callback for modifying an existing item
12064 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12065 and private data object as added by L{PrepareContainerMods}, returns
12067 @type remove_fn: callable
12068 @param remove_fn: Callback on removing item; receives absolute item index,
12069 item and private data object as added by L{PrepareContainerMods}
12072 for (op, idx, params, private) in mods:
12075 absidx = len(container) - 1
12077 raise IndexError("Not accepting negative indices other than -1")
12078 elif idx > len(container):
12079 raise IndexError("Got %s index %s, but there are only %s" %
12080 (kind, idx, len(container)))
12086 if op == constants.DDM_ADD:
12087 # Calculate where item will be added
12089 addidx = len(container)
12093 if create_fn is None:
12096 (item, changes) = create_fn(addidx, params, private)
12099 container.append(item)
12102 assert idx <= len(container)
12103 # list.insert does so before the specified index
12104 container.insert(idx, item)
12106 # Retrieve existing item
12108 item = container[absidx]
12110 raise IndexError("Invalid %s index %s" % (kind, idx))
12112 if op == constants.DDM_REMOVE:
12115 if remove_fn is not None:
12116 remove_fn(absidx, item, private)
12118 changes = [("%s/%s" % (kind, absidx), "remove")]
12120 assert container[absidx] == item
12121 del container[absidx]
12122 elif op == constants.DDM_MODIFY:
12123 if modify_fn is not None:
12124 changes = modify_fn(absidx, item, params, private)
12126 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12128 assert _TApplyContModsCbChanges(changes)
12130 if not (chgdesc is None or changes is None):
12131 chgdesc.extend(changes)
12134 def _UpdateIvNames(base_index, disks):
12135 """Updates the C{iv_name} attribute of disks.
12137 @type disks: list of L{objects.Disk}
12140 for (idx, disk) in enumerate(disks):
12141 disk.iv_name = "disk/%s" % (base_index + idx, )
12144 class _InstNicModPrivate:
12145 """Data structure for network interface modifications.
12147 Used by L{LUInstanceSetParams}.
12150 def __init__(self):
12155 class LUInstanceSetParams(LogicalUnit):
12156 """Modifies an instances's parameters.
12159 HPATH = "instance-modify"
12160 HTYPE = constants.HTYPE_INSTANCE
12164 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12165 assert ht.TList(mods)
12166 assert not mods or len(mods[0]) in (2, 3)
12168 if mods and len(mods[0]) == 2:
12172 for op, params in mods:
12173 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12174 result.append((op, -1, params))
12178 raise errors.OpPrereqError("Only one %s add or remove operation is"
12179 " supported at a time" % kind,
12180 errors.ECODE_INVAL)
12182 result.append((constants.DDM_MODIFY, op, params))
12184 assert verify_fn(result)
12191 def _CheckMods(kind, mods, key_types, item_fn):
12192 """Ensures requested disk/NIC modifications are valid.
12195 for (op, _, params) in mods:
12196 assert ht.TDict(params)
12198 utils.ForceDictType(params, key_types)
12200 if op == constants.DDM_REMOVE:
12202 raise errors.OpPrereqError("No settings should be passed when"
12203 " removing a %s" % kind,
12204 errors.ECODE_INVAL)
12205 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12206 item_fn(op, params)
12208 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12211 def _VerifyDiskModification(op, params):
12212 """Verifies a disk modification.
12215 if op == constants.DDM_ADD:
12216 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12217 if mode not in constants.DISK_ACCESS_SET:
12218 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12219 errors.ECODE_INVAL)
12221 size = params.get(constants.IDISK_SIZE, None)
12223 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12224 constants.IDISK_SIZE, errors.ECODE_INVAL)
12228 except (TypeError, ValueError), err:
12229 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12230 errors.ECODE_INVAL)
12232 params[constants.IDISK_SIZE] = size
12234 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12235 raise errors.OpPrereqError("Disk size change not possible, use"
12236 " grow-disk", errors.ECODE_INVAL)
12239 def _VerifyNicModification(op, params):
12240 """Verifies a network interface modification.
12243 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12244 ip = params.get(constants.INIC_IP, None)
12247 elif ip.lower() == constants.VALUE_NONE:
12248 params[constants.INIC_IP] = None
12249 elif not netutils.IPAddress.IsValid(ip):
12250 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12251 errors.ECODE_INVAL)
12253 bridge = params.get("bridge", None)
12254 link = params.get(constants.INIC_LINK, None)
12255 if bridge and link:
12256 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12257 " at the same time", errors.ECODE_INVAL)
12258 elif bridge and bridge.lower() == constants.VALUE_NONE:
12259 params["bridge"] = None
12260 elif link and link.lower() == constants.VALUE_NONE:
12261 params[constants.INIC_LINK] = None
12263 if op == constants.DDM_ADD:
12264 macaddr = params.get(constants.INIC_MAC, None)
12265 if macaddr is None:
12266 params[constants.INIC_MAC] = constants.VALUE_AUTO
12268 if constants.INIC_MAC in params:
12269 macaddr = params[constants.INIC_MAC]
12270 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12271 macaddr = utils.NormalizeAndValidateMac(macaddr)
12273 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12274 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12275 " modifying an existing NIC",
12276 errors.ECODE_INVAL)
12278 def CheckArguments(self):
12279 if not (self.op.nics or self.op.disks or self.op.disk_template or
12280 self.op.hvparams or self.op.beparams or self.op.os_name or
12281 self.op.offline is not None or self.op.runtime_mem):
12282 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12284 if self.op.hvparams:
12285 _CheckGlobalHvParams(self.op.hvparams)
12288 self._UpgradeDiskNicMods("disk", self.op.disks,
12289 opcodes.OpInstanceSetParams.TestDiskModifications)
12291 self._UpgradeDiskNicMods("NIC", self.op.nics,
12292 opcodes.OpInstanceSetParams.TestNicModifications)
12294 # Check disk modifications
12295 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12296 self._VerifyDiskModification)
12298 if self.op.disks and self.op.disk_template is not None:
12299 raise errors.OpPrereqError("Disk template conversion and other disk"
12300 " changes not supported at the same time",
12301 errors.ECODE_INVAL)
12303 if (self.op.disk_template and
12304 self.op.disk_template in constants.DTS_INT_MIRROR and
12305 self.op.remote_node is None):
12306 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12307 " one requires specifying a secondary node",
12308 errors.ECODE_INVAL)
12310 # Check NIC modifications
12311 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12312 self._VerifyNicModification)
12314 def ExpandNames(self):
12315 self._ExpandAndLockInstance()
12316 # Can't even acquire node locks in shared mode as upcoming changes in
12317 # Ganeti 2.6 will start to modify the node object on disk conversion
12318 self.needed_locks[locking.LEVEL_NODE] = []
12319 self.needed_locks[locking.LEVEL_NODE_RES] = []
12320 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12322 def DeclareLocks(self, level):
12323 # TODO: Acquire group lock in shared mode (disk parameters)
12324 if level == locking.LEVEL_NODE:
12325 self._LockInstancesNodes()
12326 if self.op.disk_template and self.op.remote_node:
12327 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12328 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12329 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12331 self.needed_locks[locking.LEVEL_NODE_RES] = \
12332 self.needed_locks[locking.LEVEL_NODE][:]
12334 def BuildHooksEnv(self):
12335 """Build hooks env.
12337 This runs on the master, primary and secondaries.
12341 if constants.BE_MINMEM in self.be_new:
12342 args["minmem"] = self.be_new[constants.BE_MINMEM]
12343 if constants.BE_MAXMEM in self.be_new:
12344 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12345 if constants.BE_VCPUS in self.be_new:
12346 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12347 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12348 # information at all.
12350 if self._new_nics is not None:
12353 for nic in self._new_nics:
12354 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12355 mode = nicparams[constants.NIC_MODE]
12356 link = nicparams[constants.NIC_LINK]
12357 nics.append((nic.ip, nic.mac, mode, link))
12359 args["nics"] = nics
12361 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12362 if self.op.disk_template:
12363 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12364 if self.op.runtime_mem:
12365 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12369 def BuildHooksNodes(self):
12370 """Build hooks nodes.
12373 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12376 def _PrepareNicModification(self, params, private, old_ip, old_params,
12378 update_params_dict = dict([(key, params[key])
12379 for key in constants.NICS_PARAMETERS
12382 if "bridge" in params:
12383 update_params_dict[constants.NIC_LINK] = params["bridge"]
12385 new_params = _GetUpdatedParams(old_params, update_params_dict)
12386 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12388 new_filled_params = cluster.SimpleFillNIC(new_params)
12389 objects.NIC.CheckParameterSyntax(new_filled_params)
12391 new_mode = new_filled_params[constants.NIC_MODE]
12392 if new_mode == constants.NIC_MODE_BRIDGED:
12393 bridge = new_filled_params[constants.NIC_LINK]
12394 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12396 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12398 self.warn.append(msg)
12400 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12402 elif new_mode == constants.NIC_MODE_ROUTED:
12403 ip = params.get(constants.INIC_IP, old_ip)
12405 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12406 " on a routed NIC", errors.ECODE_INVAL)
12408 if constants.INIC_MAC in params:
12409 mac = params[constants.INIC_MAC]
12411 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12412 errors.ECODE_INVAL)
12413 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12414 # otherwise generate the MAC address
12415 params[constants.INIC_MAC] = \
12416 self.cfg.GenerateMAC(self.proc.GetECId())
12418 # or validate/reserve the current one
12420 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12421 except errors.ReservationError:
12422 raise errors.OpPrereqError("MAC address '%s' already in use"
12423 " in cluster" % mac,
12424 errors.ECODE_NOTUNIQUE)
12426 private.params = new_params
12427 private.filled = new_filled_params
12429 def CheckPrereq(self):
12430 """Check prerequisites.
12432 This only checks the instance list against the existing names.
12435 # checking the new params on the primary/secondary nodes
12437 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12438 cluster = self.cluster = self.cfg.GetClusterInfo()
12439 assert self.instance is not None, \
12440 "Cannot retrieve locked instance %s" % self.op.instance_name
12441 pnode = instance.primary_node
12442 nodelist = list(instance.all_nodes)
12443 pnode_info = self.cfg.GetNodeInfo(pnode)
12444 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12446 # Prepare disk/NIC modifications
12447 self.diskmod = PrepareContainerMods(self.op.disks, None)
12448 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12451 if self.op.os_name and not self.op.force:
12452 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12453 self.op.force_variant)
12454 instance_os = self.op.os_name
12456 instance_os = instance.os
12458 assert not (self.op.disk_template and self.op.disks), \
12459 "Can't modify disk template and apply disk changes at the same time"
12461 if self.op.disk_template:
12462 if instance.disk_template == self.op.disk_template:
12463 raise errors.OpPrereqError("Instance already has disk template %s" %
12464 instance.disk_template, errors.ECODE_INVAL)
12466 if (instance.disk_template,
12467 self.op.disk_template) not in self._DISK_CONVERSIONS:
12468 raise errors.OpPrereqError("Unsupported disk template conversion from"
12469 " %s to %s" % (instance.disk_template,
12470 self.op.disk_template),
12471 errors.ECODE_INVAL)
12472 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12473 msg="cannot change disk template")
12474 if self.op.disk_template in constants.DTS_INT_MIRROR:
12475 if self.op.remote_node == pnode:
12476 raise errors.OpPrereqError("Given new secondary node %s is the same"
12477 " as the primary node of the instance" %
12478 self.op.remote_node, errors.ECODE_STATE)
12479 _CheckNodeOnline(self, self.op.remote_node)
12480 _CheckNodeNotDrained(self, self.op.remote_node)
12481 # FIXME: here we assume that the old instance type is DT_PLAIN
12482 assert instance.disk_template == constants.DT_PLAIN
12483 disks = [{constants.IDISK_SIZE: d.size,
12484 constants.IDISK_VG: d.logical_id[0]}
12485 for d in instance.disks]
12486 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12487 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12489 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12490 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12491 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12492 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12493 ignore=self.op.ignore_ipolicy)
12494 if pnode_info.group != snode_info.group:
12495 self.LogWarning("The primary and secondary nodes are in two"
12496 " different node groups; the disk parameters"
12497 " from the first disk's node group will be"
12500 # hvparams processing
12501 if self.op.hvparams:
12502 hv_type = instance.hypervisor
12503 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12504 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12505 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12508 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12509 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12510 self.hv_proposed = self.hv_new = hv_new # the new actual values
12511 self.hv_inst = i_hvdict # the new dict (without defaults)
12513 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12515 self.hv_new = self.hv_inst = {}
12517 # beparams processing
12518 if self.op.beparams:
12519 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12521 objects.UpgradeBeParams(i_bedict)
12522 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12523 be_new = cluster.SimpleFillBE(i_bedict)
12524 self.be_proposed = self.be_new = be_new # the new actual values
12525 self.be_inst = i_bedict # the new dict (without defaults)
12527 self.be_new = self.be_inst = {}
12528 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12529 be_old = cluster.FillBE(instance)
12531 # CPU param validation -- checking every time a parameter is
12532 # changed to cover all cases where either CPU mask or vcpus have
12534 if (constants.BE_VCPUS in self.be_proposed and
12535 constants.HV_CPU_MASK in self.hv_proposed):
12537 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12538 # Verify mask is consistent with number of vCPUs. Can skip this
12539 # test if only 1 entry in the CPU mask, which means same mask
12540 # is applied to all vCPUs.
12541 if (len(cpu_list) > 1 and
12542 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12543 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12545 (self.be_proposed[constants.BE_VCPUS],
12546 self.hv_proposed[constants.HV_CPU_MASK]),
12547 errors.ECODE_INVAL)
12549 # Only perform this test if a new CPU mask is given
12550 if constants.HV_CPU_MASK in self.hv_new:
12551 # Calculate the largest CPU number requested
12552 max_requested_cpu = max(map(max, cpu_list))
12553 # Check that all of the instance's nodes have enough physical CPUs to
12554 # satisfy the requested CPU mask
12555 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12556 max_requested_cpu + 1, instance.hypervisor)
12558 # osparams processing
12559 if self.op.osparams:
12560 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12561 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12562 self.os_inst = i_osdict # the new dict (without defaults)
12568 #TODO(dynmem): do the appropriate check involving MINMEM
12569 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12570 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12571 mem_check_list = [pnode]
12572 if be_new[constants.BE_AUTO_BALANCE]:
12573 # either we changed auto_balance to yes or it was from before
12574 mem_check_list.extend(instance.secondary_nodes)
12575 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12576 instance.hypervisor)
12577 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12578 [instance.hypervisor])
12579 pninfo = nodeinfo[pnode]
12580 msg = pninfo.fail_msg
12582 # Assume the primary node is unreachable and go ahead
12583 self.warn.append("Can't get info from primary node %s: %s" %
12586 (_, _, (pnhvinfo, )) = pninfo.payload
12587 if not isinstance(pnhvinfo.get("memory_free", None), int):
12588 self.warn.append("Node data from primary node %s doesn't contain"
12589 " free memory information" % pnode)
12590 elif instance_info.fail_msg:
12591 self.warn.append("Can't get instance runtime information: %s" %
12592 instance_info.fail_msg)
12594 if instance_info.payload:
12595 current_mem = int(instance_info.payload["memory"])
12597 # Assume instance not running
12598 # (there is a slight race condition here, but it's not very
12599 # probable, and we have no other way to check)
12600 # TODO: Describe race condition
12602 #TODO(dynmem): do the appropriate check involving MINMEM
12603 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12604 pnhvinfo["memory_free"])
12606 raise errors.OpPrereqError("This change will prevent the instance"
12607 " from starting, due to %d MB of memory"
12608 " missing on its primary node" %
12610 errors.ECODE_NORES)
12612 if be_new[constants.BE_AUTO_BALANCE]:
12613 for node, nres in nodeinfo.items():
12614 if node not in instance.secondary_nodes:
12616 nres.Raise("Can't get info from secondary node %s" % node,
12617 prereq=True, ecode=errors.ECODE_STATE)
12618 (_, _, (nhvinfo, )) = nres.payload
12619 if not isinstance(nhvinfo.get("memory_free", None), int):
12620 raise errors.OpPrereqError("Secondary node %s didn't return free"
12621 " memory information" % node,
12622 errors.ECODE_STATE)
12623 #TODO(dynmem): do the appropriate check involving MINMEM
12624 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12625 raise errors.OpPrereqError("This change will prevent the instance"
12626 " from failover to its secondary node"
12627 " %s, due to not enough memory" % node,
12628 errors.ECODE_STATE)
12630 if self.op.runtime_mem:
12631 remote_info = self.rpc.call_instance_info(instance.primary_node,
12633 instance.hypervisor)
12634 remote_info.Raise("Error checking node %s" % instance.primary_node)
12635 if not remote_info.payload: # not running already
12636 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12637 errors.ECODE_STATE)
12639 current_memory = remote_info.payload["memory"]
12640 if (not self.op.force and
12641 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12642 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12643 raise errors.OpPrereqError("Instance %s must have memory between %d"
12644 " and %d MB of memory unless --force is"
12645 " given" % (instance.name,
12646 self.be_proposed[constants.BE_MINMEM],
12647 self.be_proposed[constants.BE_MAXMEM]),
12648 errors.ECODE_INVAL)
12650 if self.op.runtime_mem > current_memory:
12651 _CheckNodeFreeMemory(self, instance.primary_node,
12652 "ballooning memory for instance %s" %
12654 self.op.memory - current_memory,
12655 instance.hypervisor)
12657 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12658 raise errors.OpPrereqError("Disk operations not supported for"
12659 " diskless instances",
12660 errors.ECODE_INVAL)
12662 def _PrepareNicCreate(_, params, private):
12663 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12664 return (None, None)
12666 def _PrepareNicMod(_, nic, params, private):
12667 self._PrepareNicModification(params, private, nic.ip,
12668 nic.nicparams, cluster, pnode)
12671 # Verify NIC changes (operating on copy)
12672 nics = instance.nics[:]
12673 ApplyContainerMods("NIC", nics, None, self.nicmod,
12674 _PrepareNicCreate, _PrepareNicMod, None)
12675 if len(nics) > constants.MAX_NICS:
12676 raise errors.OpPrereqError("Instance has too many network interfaces"
12677 " (%d), cannot add more" % constants.MAX_NICS,
12678 errors.ECODE_STATE)
12680 # Verify disk changes (operating on a copy)
12681 disks = instance.disks[:]
12682 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12683 if len(disks) > constants.MAX_DISKS:
12684 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12685 " more" % constants.MAX_DISKS,
12686 errors.ECODE_STATE)
12688 if self.op.offline is not None:
12689 if self.op.offline:
12690 msg = "can't change to offline"
12692 msg = "can't change to online"
12693 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12695 # Pre-compute NIC changes (necessary to use result in hooks)
12696 self._nic_chgdesc = []
12698 # Operate on copies as this is still in prereq
12699 nics = [nic.Copy() for nic in instance.nics]
12700 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12701 self._CreateNewNic, self._ApplyNicMods, None)
12702 self._new_nics = nics
12704 self._new_nics = None
12706 def _ConvertPlainToDrbd(self, feedback_fn):
12707 """Converts an instance from plain to drbd.
12710 feedback_fn("Converting template to drbd")
12711 instance = self.instance
12712 pnode = instance.primary_node
12713 snode = self.op.remote_node
12715 assert instance.disk_template == constants.DT_PLAIN
12717 # create a fake disk info for _GenerateDiskTemplate
12718 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12719 constants.IDISK_VG: d.logical_id[0]}
12720 for d in instance.disks]
12721 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12722 instance.name, pnode, [snode],
12723 disk_info, None, None, 0, feedback_fn,
12725 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12727 info = _GetInstanceInfoText(instance)
12728 feedback_fn("Creating additional volumes...")
12729 # first, create the missing data and meta devices
12730 for disk in anno_disks:
12731 # unfortunately this is... not too nice
12732 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12734 for child in disk.children:
12735 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12736 # at this stage, all new LVs have been created, we can rename the
12738 feedback_fn("Renaming original volumes...")
12739 rename_list = [(o, n.children[0].logical_id)
12740 for (o, n) in zip(instance.disks, new_disks)]
12741 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12742 result.Raise("Failed to rename original LVs")
12744 feedback_fn("Initializing DRBD devices...")
12745 # all child devices are in place, we can now create the DRBD devices
12746 for disk in anno_disks:
12747 for node in [pnode, snode]:
12748 f_create = node == pnode
12749 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12751 # at this point, the instance has been modified
12752 instance.disk_template = constants.DT_DRBD8
12753 instance.disks = new_disks
12754 self.cfg.Update(instance, feedback_fn)
12756 # Release node locks while waiting for sync
12757 _ReleaseLocks(self, locking.LEVEL_NODE)
12759 # disks are created, waiting for sync
12760 disk_abort = not _WaitForSync(self, instance,
12761 oneshot=not self.op.wait_for_sync)
12763 raise errors.OpExecError("There are some degraded disks for"
12764 " this instance, please cleanup manually")
12766 # Node resource locks will be released by caller
12768 def _ConvertDrbdToPlain(self, feedback_fn):
12769 """Converts an instance from drbd to plain.
12772 instance = self.instance
12774 assert len(instance.secondary_nodes) == 1
12775 assert instance.disk_template == constants.DT_DRBD8
12777 pnode = instance.primary_node
12778 snode = instance.secondary_nodes[0]
12779 feedback_fn("Converting template to plain")
12781 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12782 new_disks = [d.children[0] for d in instance.disks]
12784 # copy over size and mode
12785 for parent, child in zip(old_disks, new_disks):
12786 child.size = parent.size
12787 child.mode = parent.mode
12789 # this is a DRBD disk, return its port to the pool
12790 # NOTE: this must be done right before the call to cfg.Update!
12791 for disk in old_disks:
12792 tcp_port = disk.logical_id[2]
12793 self.cfg.AddTcpUdpPort(tcp_port)
12795 # update instance structure
12796 instance.disks = new_disks
12797 instance.disk_template = constants.DT_PLAIN
12798 self.cfg.Update(instance, feedback_fn)
12800 # Release locks in case removing disks takes a while
12801 _ReleaseLocks(self, locking.LEVEL_NODE)
12803 feedback_fn("Removing volumes on the secondary node...")
12804 for disk in old_disks:
12805 self.cfg.SetDiskID(disk, snode)
12806 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12808 self.LogWarning("Could not remove block device %s on node %s,"
12809 " continuing anyway: %s", disk.iv_name, snode, msg)
12811 feedback_fn("Removing unneeded volumes on the primary node...")
12812 for idx, disk in enumerate(old_disks):
12813 meta = disk.children[1]
12814 self.cfg.SetDiskID(meta, pnode)
12815 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12817 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12818 " continuing anyway: %s", idx, pnode, msg)
12820 def _CreateNewDisk(self, idx, params, _):
12821 """Creates a new disk.
12824 instance = self.instance
12827 if instance.disk_template in constants.DTS_FILEBASED:
12828 (file_driver, file_path) = instance.disks[0].logical_id
12829 file_path = os.path.dirname(file_path)
12831 file_driver = file_path = None
12834 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12835 instance.primary_node, instance.secondary_nodes,
12836 [params], file_path, file_driver, idx,
12837 self.Log, self.diskparams)[0]
12839 info = _GetInstanceInfoText(instance)
12841 logging.info("Creating volume %s for instance %s",
12842 disk.iv_name, instance.name)
12843 # Note: this needs to be kept in sync with _CreateDisks
12845 for node in instance.all_nodes:
12846 f_create = (node == instance.primary_node)
12848 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12849 except errors.OpExecError, err:
12850 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12851 disk.iv_name, disk, node, err)
12854 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12858 def _ModifyDisk(idx, disk, params, _):
12859 """Modifies a disk.
12862 disk.mode = params[constants.IDISK_MODE]
12865 ("disk.mode/%d" % idx, disk.mode),
12868 def _RemoveDisk(self, idx, root, _):
12872 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12873 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12874 self.cfg.SetDiskID(disk, node)
12875 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12877 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12878 " continuing anyway", idx, node, msg)
12880 # if this is a DRBD disk, return its port to the pool
12881 if root.dev_type in constants.LDS_DRBD:
12882 self.cfg.AddTcpUdpPort(root.logical_id[2])
12885 def _CreateNewNic(idx, params, private):
12886 """Creates data structure for a new network interface.
12889 mac = params[constants.INIC_MAC]
12890 ip = params.get(constants.INIC_IP, None)
12891 nicparams = private.params
12893 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12895 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12896 (mac, ip, private.filled[constants.NIC_MODE],
12897 private.filled[constants.NIC_LINK])),
12901 def _ApplyNicMods(idx, nic, params, private):
12902 """Modifies a network interface.
12907 for key in [constants.INIC_MAC, constants.INIC_IP]:
12909 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12910 setattr(nic, key, params[key])
12913 nic.nicparams = private.params
12915 for (key, val) in params.items():
12916 changes.append(("nic.%s/%d" % (key, idx), val))
12920 def Exec(self, feedback_fn):
12921 """Modifies an instance.
12923 All parameters take effect only at the next restart of the instance.
12926 # Process here the warnings from CheckPrereq, as we don't have a
12927 # feedback_fn there.
12928 # TODO: Replace with self.LogWarning
12929 for warn in self.warn:
12930 feedback_fn("WARNING: %s" % warn)
12932 assert ((self.op.disk_template is None) ^
12933 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12934 "Not owning any node resource locks"
12937 instance = self.instance
12940 if self.op.runtime_mem:
12941 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12943 self.op.runtime_mem)
12944 rpcres.Raise("Cannot modify instance runtime memory")
12945 result.append(("runtime_memory", self.op.runtime_mem))
12947 # Apply disk changes
12948 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12949 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12950 _UpdateIvNames(0, instance.disks)
12952 if self.op.disk_template:
12954 check_nodes = set(instance.all_nodes)
12955 if self.op.remote_node:
12956 check_nodes.add(self.op.remote_node)
12957 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12958 owned = self.owned_locks(level)
12959 assert not (check_nodes - owned), \
12960 ("Not owning the correct locks, owning %r, expected at least %r" %
12961 (owned, check_nodes))
12963 r_shut = _ShutdownInstanceDisks(self, instance)
12965 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12966 " proceed with disk template conversion")
12967 mode = (instance.disk_template, self.op.disk_template)
12969 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12971 self.cfg.ReleaseDRBDMinors(instance.name)
12973 result.append(("disk_template", self.op.disk_template))
12975 assert instance.disk_template == self.op.disk_template, \
12976 ("Expected disk template '%s', found '%s'" %
12977 (self.op.disk_template, instance.disk_template))
12979 # Release node and resource locks if there are any (they might already have
12980 # been released during disk conversion)
12981 _ReleaseLocks(self, locking.LEVEL_NODE)
12982 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12984 # Apply NIC changes
12985 if self._new_nics is not None:
12986 instance.nics = self._new_nics
12987 result.extend(self._nic_chgdesc)
12990 if self.op.hvparams:
12991 instance.hvparams = self.hv_inst
12992 for key, val in self.op.hvparams.iteritems():
12993 result.append(("hv/%s" % key, val))
12996 if self.op.beparams:
12997 instance.beparams = self.be_inst
12998 for key, val in self.op.beparams.iteritems():
12999 result.append(("be/%s" % key, val))
13002 if self.op.os_name:
13003 instance.os = self.op.os_name
13006 if self.op.osparams:
13007 instance.osparams = self.os_inst
13008 for key, val in self.op.osparams.iteritems():
13009 result.append(("os/%s" % key, val))
13011 if self.op.offline is None:
13014 elif self.op.offline:
13015 # Mark instance as offline
13016 self.cfg.MarkInstanceOffline(instance.name)
13017 result.append(("admin_state", constants.ADMINST_OFFLINE))
13019 # Mark instance as online, but stopped
13020 self.cfg.MarkInstanceDown(instance.name)
13021 result.append(("admin_state", constants.ADMINST_DOWN))
13023 self.cfg.Update(instance, feedback_fn)
13025 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13026 self.owned_locks(locking.LEVEL_NODE)), \
13027 "All node locks should have been released by now"
13031 _DISK_CONVERSIONS = {
13032 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13033 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13037 class LUInstanceChangeGroup(LogicalUnit):
13038 HPATH = "instance-change-group"
13039 HTYPE = constants.HTYPE_INSTANCE
13042 def ExpandNames(self):
13043 self.share_locks = _ShareAll()
13044 self.needed_locks = {
13045 locking.LEVEL_NODEGROUP: [],
13046 locking.LEVEL_NODE: [],
13049 self._ExpandAndLockInstance()
13051 if self.op.target_groups:
13052 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13053 self.op.target_groups)
13055 self.req_target_uuids = None
13057 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13059 def DeclareLocks(self, level):
13060 if level == locking.LEVEL_NODEGROUP:
13061 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13063 if self.req_target_uuids:
13064 lock_groups = set(self.req_target_uuids)
13066 # Lock all groups used by instance optimistically; this requires going
13067 # via the node before it's locked, requiring verification later on
13068 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13069 lock_groups.update(instance_groups)
13071 # No target groups, need to lock all of them
13072 lock_groups = locking.ALL_SET
13074 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13076 elif level == locking.LEVEL_NODE:
13077 if self.req_target_uuids:
13078 # Lock all nodes used by instances
13079 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13080 self._LockInstancesNodes()
13082 # Lock all nodes in all potential target groups
13083 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13084 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13085 member_nodes = [node_name
13086 for group in lock_groups
13087 for node_name in self.cfg.GetNodeGroup(group).members]
13088 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13090 # Lock all nodes as all groups are potential targets
13091 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13093 def CheckPrereq(self):
13094 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13095 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13096 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13098 assert (self.req_target_uuids is None or
13099 owned_groups.issuperset(self.req_target_uuids))
13100 assert owned_instances == set([self.op.instance_name])
13102 # Get instance information
13103 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13105 # Check if node groups for locked instance are still correct
13106 assert owned_nodes.issuperset(self.instance.all_nodes), \
13107 ("Instance %s's nodes changed while we kept the lock" %
13108 self.op.instance_name)
13110 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13113 if self.req_target_uuids:
13114 # User requested specific target groups
13115 self.target_uuids = frozenset(self.req_target_uuids)
13117 # All groups except those used by the instance are potential targets
13118 self.target_uuids = owned_groups - inst_groups
13120 conflicting_groups = self.target_uuids & inst_groups
13121 if conflicting_groups:
13122 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13123 " used by the instance '%s'" %
13124 (utils.CommaJoin(conflicting_groups),
13125 self.op.instance_name),
13126 errors.ECODE_INVAL)
13128 if not self.target_uuids:
13129 raise errors.OpPrereqError("There are no possible target groups",
13130 errors.ECODE_INVAL)
13132 def BuildHooksEnv(self):
13133 """Build hooks env.
13136 assert self.target_uuids
13139 "TARGET_GROUPS": " ".join(self.target_uuids),
13142 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13146 def BuildHooksNodes(self):
13147 """Build hooks nodes.
13150 mn = self.cfg.GetMasterNode()
13151 return ([mn], [mn])
13153 def Exec(self, feedback_fn):
13154 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13156 assert instances == [self.op.instance_name], "Instance not locked"
13158 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13159 instances=instances, target_groups=list(self.target_uuids))
13161 ial.Run(self.op.iallocator)
13163 if not ial.success:
13164 raise errors.OpPrereqError("Can't compute solution for changing group of"
13165 " instance '%s' using iallocator '%s': %s" %
13166 (self.op.instance_name, self.op.iallocator,
13168 errors.ECODE_NORES)
13170 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13172 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13173 " instance '%s'", len(jobs), self.op.instance_name)
13175 return ResultWithJobs(jobs)
13178 class LUBackupQuery(NoHooksLU):
13179 """Query the exports list
13184 def CheckArguments(self):
13185 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13186 ["node", "export"], self.op.use_locking)
13188 def ExpandNames(self):
13189 self.expq.ExpandNames(self)
13191 def DeclareLocks(self, level):
13192 self.expq.DeclareLocks(self, level)
13194 def Exec(self, feedback_fn):
13197 for (node, expname) in self.expq.OldStyleQuery(self):
13198 if expname is None:
13199 result[node] = False
13201 result.setdefault(node, []).append(expname)
13206 class _ExportQuery(_QueryBase):
13207 FIELDS = query.EXPORT_FIELDS
13209 #: The node name is not a unique key for this query
13210 SORT_FIELD = "node"
13212 def ExpandNames(self, lu):
13213 lu.needed_locks = {}
13215 # The following variables interact with _QueryBase._GetNames
13217 self.wanted = _GetWantedNodes(lu, self.names)
13219 self.wanted = locking.ALL_SET
13221 self.do_locking = self.use_locking
13223 if self.do_locking:
13224 lu.share_locks = _ShareAll()
13225 lu.needed_locks = {
13226 locking.LEVEL_NODE: self.wanted,
13229 def DeclareLocks(self, lu, level):
13232 def _GetQueryData(self, lu):
13233 """Computes the list of nodes and their attributes.
13236 # Locking is not used
13238 assert not (compat.any(lu.glm.is_owned(level)
13239 for level in locking.LEVELS
13240 if level != locking.LEVEL_CLUSTER) or
13241 self.do_locking or self.use_locking)
13243 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13247 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13249 result.append((node, None))
13251 result.extend((node, expname) for expname in nres.payload)
13256 class LUBackupPrepare(NoHooksLU):
13257 """Prepares an instance for an export and returns useful information.
13262 def ExpandNames(self):
13263 self._ExpandAndLockInstance()
13265 def CheckPrereq(self):
13266 """Check prerequisites.
13269 instance_name = self.op.instance_name
13271 self.instance = self.cfg.GetInstanceInfo(instance_name)
13272 assert self.instance is not None, \
13273 "Cannot retrieve locked instance %s" % self.op.instance_name
13274 _CheckNodeOnline(self, self.instance.primary_node)
13276 self._cds = _GetClusterDomainSecret()
13278 def Exec(self, feedback_fn):
13279 """Prepares an instance for an export.
13282 instance = self.instance
13284 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13285 salt = utils.GenerateSecret(8)
13287 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13288 result = self.rpc.call_x509_cert_create(instance.primary_node,
13289 constants.RIE_CERT_VALIDITY)
13290 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13292 (name, cert_pem) = result.payload
13294 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13298 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13299 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13301 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13307 class LUBackupExport(LogicalUnit):
13308 """Export an instance to an image in the cluster.
13311 HPATH = "instance-export"
13312 HTYPE = constants.HTYPE_INSTANCE
13315 def CheckArguments(self):
13316 """Check the arguments.
13319 self.x509_key_name = self.op.x509_key_name
13320 self.dest_x509_ca_pem = self.op.destination_x509_ca
13322 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13323 if not self.x509_key_name:
13324 raise errors.OpPrereqError("Missing X509 key name for encryption",
13325 errors.ECODE_INVAL)
13327 if not self.dest_x509_ca_pem:
13328 raise errors.OpPrereqError("Missing destination X509 CA",
13329 errors.ECODE_INVAL)
13331 def ExpandNames(self):
13332 self._ExpandAndLockInstance()
13334 # Lock all nodes for local exports
13335 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13336 # FIXME: lock only instance primary and destination node
13338 # Sad but true, for now we have do lock all nodes, as we don't know where
13339 # the previous export might be, and in this LU we search for it and
13340 # remove it from its current node. In the future we could fix this by:
13341 # - making a tasklet to search (share-lock all), then create the
13342 # new one, then one to remove, after
13343 # - removing the removal operation altogether
13344 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13346 def DeclareLocks(self, level):
13347 """Last minute lock declaration."""
13348 # All nodes are locked anyway, so nothing to do here.
13350 def BuildHooksEnv(self):
13351 """Build hooks env.
13353 This will run on the master, primary node and target node.
13357 "EXPORT_MODE": self.op.mode,
13358 "EXPORT_NODE": self.op.target_node,
13359 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13360 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13361 # TODO: Generic function for boolean env variables
13362 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13365 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13369 def BuildHooksNodes(self):
13370 """Build hooks nodes.
13373 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13375 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13376 nl.append(self.op.target_node)
13380 def CheckPrereq(self):
13381 """Check prerequisites.
13383 This checks that the instance and node names are valid.
13386 instance_name = self.op.instance_name
13388 self.instance = self.cfg.GetInstanceInfo(instance_name)
13389 assert self.instance is not None, \
13390 "Cannot retrieve locked instance %s" % self.op.instance_name
13391 _CheckNodeOnline(self, self.instance.primary_node)
13393 if (self.op.remove_instance and
13394 self.instance.admin_state == constants.ADMINST_UP and
13395 not self.op.shutdown):
13396 raise errors.OpPrereqError("Can not remove instance without shutting it"
13399 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13400 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13401 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13402 assert self.dst_node is not None
13404 _CheckNodeOnline(self, self.dst_node.name)
13405 _CheckNodeNotDrained(self, self.dst_node.name)
13408 self.dest_disk_info = None
13409 self.dest_x509_ca = None
13411 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13412 self.dst_node = None
13414 if len(self.op.target_node) != len(self.instance.disks):
13415 raise errors.OpPrereqError(("Received destination information for %s"
13416 " disks, but instance %s has %s disks") %
13417 (len(self.op.target_node), instance_name,
13418 len(self.instance.disks)),
13419 errors.ECODE_INVAL)
13421 cds = _GetClusterDomainSecret()
13423 # Check X509 key name
13425 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13426 except (TypeError, ValueError), err:
13427 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13429 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13430 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13431 errors.ECODE_INVAL)
13433 # Load and verify CA
13435 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13436 except OpenSSL.crypto.Error, err:
13437 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13438 (err, ), errors.ECODE_INVAL)
13440 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13441 if errcode is not None:
13442 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13443 (msg, ), errors.ECODE_INVAL)
13445 self.dest_x509_ca = cert
13447 # Verify target information
13449 for idx, disk_data in enumerate(self.op.target_node):
13451 (host, port, magic) = \
13452 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13453 except errors.GenericError, err:
13454 raise errors.OpPrereqError("Target info for disk %s: %s" %
13455 (idx, err), errors.ECODE_INVAL)
13457 disk_info.append((host, port, magic))
13459 assert len(disk_info) == len(self.op.target_node)
13460 self.dest_disk_info = disk_info
13463 raise errors.ProgrammerError("Unhandled export mode %r" %
13466 # instance disk type verification
13467 # TODO: Implement export support for file-based disks
13468 for disk in self.instance.disks:
13469 if disk.dev_type == constants.LD_FILE:
13470 raise errors.OpPrereqError("Export not supported for instances with"
13471 " file-based disks", errors.ECODE_INVAL)
13473 def _CleanupExports(self, feedback_fn):
13474 """Removes exports of current instance from all other nodes.
13476 If an instance in a cluster with nodes A..D was exported to node C, its
13477 exports will be removed from the nodes A, B and D.
13480 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13482 nodelist = self.cfg.GetNodeList()
13483 nodelist.remove(self.dst_node.name)
13485 # on one-node clusters nodelist will be empty after the removal
13486 # if we proceed the backup would be removed because OpBackupQuery
13487 # substitutes an empty list with the full cluster node list.
13488 iname = self.instance.name
13490 feedback_fn("Removing old exports for instance %s" % iname)
13491 exportlist = self.rpc.call_export_list(nodelist)
13492 for node in exportlist:
13493 if exportlist[node].fail_msg:
13495 if iname in exportlist[node].payload:
13496 msg = self.rpc.call_export_remove(node, iname).fail_msg
13498 self.LogWarning("Could not remove older export for instance %s"
13499 " on node %s: %s", iname, node, msg)
13501 def Exec(self, feedback_fn):
13502 """Export an instance to an image in the cluster.
13505 assert self.op.mode in constants.EXPORT_MODES
13507 instance = self.instance
13508 src_node = instance.primary_node
13510 if self.op.shutdown:
13511 # shutdown the instance, but not the disks
13512 feedback_fn("Shutting down instance %s" % instance.name)
13513 result = self.rpc.call_instance_shutdown(src_node, instance,
13514 self.op.shutdown_timeout)
13515 # TODO: Maybe ignore failures if ignore_remove_failures is set
13516 result.Raise("Could not shutdown instance %s on"
13517 " node %s" % (instance.name, src_node))
13519 # set the disks ID correctly since call_instance_start needs the
13520 # correct drbd minor to create the symlinks
13521 for disk in instance.disks:
13522 self.cfg.SetDiskID(disk, src_node)
13524 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13527 # Activate the instance disks if we'exporting a stopped instance
13528 feedback_fn("Activating disks for %s" % instance.name)
13529 _StartInstanceDisks(self, instance, None)
13532 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13535 helper.CreateSnapshots()
13537 if (self.op.shutdown and
13538 instance.admin_state == constants.ADMINST_UP and
13539 not self.op.remove_instance):
13540 assert not activate_disks
13541 feedback_fn("Starting instance %s" % instance.name)
13542 result = self.rpc.call_instance_start(src_node,
13543 (instance, None, None), False)
13544 msg = result.fail_msg
13546 feedback_fn("Failed to start instance: %s" % msg)
13547 _ShutdownInstanceDisks(self, instance)
13548 raise errors.OpExecError("Could not start instance: %s" % msg)
13550 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13551 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13552 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13553 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13554 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13556 (key_name, _, _) = self.x509_key_name
13559 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13562 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13563 key_name, dest_ca_pem,
13568 # Check for backwards compatibility
13569 assert len(dresults) == len(instance.disks)
13570 assert compat.all(isinstance(i, bool) for i in dresults), \
13571 "Not all results are boolean: %r" % dresults
13575 feedback_fn("Deactivating disks for %s" % instance.name)
13576 _ShutdownInstanceDisks(self, instance)
13578 if not (compat.all(dresults) and fin_resu):
13581 failures.append("export finalization")
13582 if not compat.all(dresults):
13583 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13585 failures.append("disk export: disk(s) %s" % fdsk)
13587 raise errors.OpExecError("Export failed, errors in %s" %
13588 utils.CommaJoin(failures))
13590 # At this point, the export was successful, we can cleanup/finish
13592 # Remove instance if requested
13593 if self.op.remove_instance:
13594 feedback_fn("Removing instance %s" % instance.name)
13595 _RemoveInstance(self, feedback_fn, instance,
13596 self.op.ignore_remove_failures)
13598 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13599 self._CleanupExports(feedback_fn)
13601 return fin_resu, dresults
13604 class LUBackupRemove(NoHooksLU):
13605 """Remove exports related to the named instance.
13610 def ExpandNames(self):
13611 self.needed_locks = {}
13612 # We need all nodes to be locked in order for RemoveExport to work, but we
13613 # don't need to lock the instance itself, as nothing will happen to it (and
13614 # we can remove exports also for a removed instance)
13615 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13617 def Exec(self, feedback_fn):
13618 """Remove any export.
13621 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13622 # If the instance was not found we'll try with the name that was passed in.
13623 # This will only work if it was an FQDN, though.
13625 if not instance_name:
13627 instance_name = self.op.instance_name
13629 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13630 exportlist = self.rpc.call_export_list(locked_nodes)
13632 for node in exportlist:
13633 msg = exportlist[node].fail_msg
13635 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13637 if instance_name in exportlist[node].payload:
13639 result = self.rpc.call_export_remove(node, instance_name)
13640 msg = result.fail_msg
13642 logging.error("Could not remove export for instance %s"
13643 " on node %s: %s", instance_name, node, msg)
13645 if fqdn_warn and not found:
13646 feedback_fn("Export not found. If trying to remove an export belonging"
13647 " to a deleted instance please use its Fully Qualified"
13651 class LUGroupAdd(LogicalUnit):
13652 """Logical unit for creating node groups.
13655 HPATH = "group-add"
13656 HTYPE = constants.HTYPE_GROUP
13659 def ExpandNames(self):
13660 # We need the new group's UUID here so that we can create and acquire the
13661 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13662 # that it should not check whether the UUID exists in the configuration.
13663 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13664 self.needed_locks = {}
13665 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13667 def CheckPrereq(self):
13668 """Check prerequisites.
13670 This checks that the given group name is not an existing node group
13675 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13676 except errors.OpPrereqError:
13679 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13680 " node group (UUID: %s)" %
13681 (self.op.group_name, existing_uuid),
13682 errors.ECODE_EXISTS)
13684 if self.op.ndparams:
13685 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13687 if self.op.hv_state:
13688 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13690 self.new_hv_state = None
13692 if self.op.disk_state:
13693 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13695 self.new_disk_state = None
13697 if self.op.diskparams:
13698 for templ in constants.DISK_TEMPLATES:
13699 if templ in self.op.diskparams:
13700 utils.ForceDictType(self.op.diskparams[templ],
13701 constants.DISK_DT_TYPES)
13702 self.new_diskparams = self.op.diskparams
13704 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13705 except errors.OpPrereqError, err:
13706 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13707 errors.ECODE_INVAL)
13709 self.new_diskparams = {}
13711 if self.op.ipolicy:
13712 cluster = self.cfg.GetClusterInfo()
13713 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13715 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13716 except errors.ConfigurationError, err:
13717 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13718 errors.ECODE_INVAL)
13720 def BuildHooksEnv(self):
13721 """Build hooks env.
13725 "GROUP_NAME": self.op.group_name,
13728 def BuildHooksNodes(self):
13729 """Build hooks nodes.
13732 mn = self.cfg.GetMasterNode()
13733 return ([mn], [mn])
13735 def Exec(self, feedback_fn):
13736 """Add the node group to the cluster.
13739 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13740 uuid=self.group_uuid,
13741 alloc_policy=self.op.alloc_policy,
13742 ndparams=self.op.ndparams,
13743 diskparams=self.new_diskparams,
13744 ipolicy=self.op.ipolicy,
13745 hv_state_static=self.new_hv_state,
13746 disk_state_static=self.new_disk_state)
13748 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13749 del self.remove_locks[locking.LEVEL_NODEGROUP]
13752 class LUGroupAssignNodes(NoHooksLU):
13753 """Logical unit for assigning nodes to groups.
13758 def ExpandNames(self):
13759 # These raise errors.OpPrereqError on their own:
13760 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13761 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13763 # We want to lock all the affected nodes and groups. We have readily
13764 # available the list of nodes, and the *destination* group. To gather the
13765 # list of "source" groups, we need to fetch node information later on.
13766 self.needed_locks = {
13767 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13768 locking.LEVEL_NODE: self.op.nodes,
13771 def DeclareLocks(self, level):
13772 if level == locking.LEVEL_NODEGROUP:
13773 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13775 # Try to get all affected nodes' groups without having the group or node
13776 # lock yet. Needs verification later in the code flow.
13777 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13779 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13781 def CheckPrereq(self):
13782 """Check prerequisites.
13785 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13786 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13787 frozenset(self.op.nodes))
13789 expected_locks = (set([self.group_uuid]) |
13790 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13791 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13792 if actual_locks != expected_locks:
13793 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13794 " current groups are '%s', used to be '%s'" %
13795 (utils.CommaJoin(expected_locks),
13796 utils.CommaJoin(actual_locks)))
13798 self.node_data = self.cfg.GetAllNodesInfo()
13799 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13800 instance_data = self.cfg.GetAllInstancesInfo()
13802 if self.group is None:
13803 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13804 (self.op.group_name, self.group_uuid))
13806 (new_splits, previous_splits) = \
13807 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13808 for node in self.op.nodes],
13809 self.node_data, instance_data)
13812 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13814 if not self.op.force:
13815 raise errors.OpExecError("The following instances get split by this"
13816 " change and --force was not given: %s" %
13819 self.LogWarning("This operation will split the following instances: %s",
13822 if previous_splits:
13823 self.LogWarning("In addition, these already-split instances continue"
13824 " to be split across groups: %s",
13825 utils.CommaJoin(utils.NiceSort(previous_splits)))
13827 def Exec(self, feedback_fn):
13828 """Assign nodes to a new group.
13831 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13833 self.cfg.AssignGroupNodes(mods)
13836 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13837 """Check for split instances after a node assignment.
13839 This method considers a series of node assignments as an atomic operation,
13840 and returns information about split instances after applying the set of
13843 In particular, it returns information about newly split instances, and
13844 instances that were already split, and remain so after the change.
13846 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13849 @type changes: list of (node_name, new_group_uuid) pairs.
13850 @param changes: list of node assignments to consider.
13851 @param node_data: a dict with data for all nodes
13852 @param instance_data: a dict with all instances to consider
13853 @rtype: a two-tuple
13854 @return: a list of instances that were previously okay and result split as a
13855 consequence of this change, and a list of instances that were previously
13856 split and this change does not fix.
13859 changed_nodes = dict((node, group) for node, group in changes
13860 if node_data[node].group != group)
13862 all_split_instances = set()
13863 previously_split_instances = set()
13865 def InstanceNodes(instance):
13866 return [instance.primary_node] + list(instance.secondary_nodes)
13868 for inst in instance_data.values():
13869 if inst.disk_template not in constants.DTS_INT_MIRROR:
13872 instance_nodes = InstanceNodes(inst)
13874 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13875 previously_split_instances.add(inst.name)
13877 if len(set(changed_nodes.get(node, node_data[node].group)
13878 for node in instance_nodes)) > 1:
13879 all_split_instances.add(inst.name)
13881 return (list(all_split_instances - previously_split_instances),
13882 list(previously_split_instances & all_split_instances))
13885 class _GroupQuery(_QueryBase):
13886 FIELDS = query.GROUP_FIELDS
13888 def ExpandNames(self, lu):
13889 lu.needed_locks = {}
13891 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13892 self._cluster = lu.cfg.GetClusterInfo()
13893 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13896 self.wanted = [name_to_uuid[name]
13897 for name in utils.NiceSort(name_to_uuid.keys())]
13899 # Accept names to be either names or UUIDs.
13902 all_uuid = frozenset(self._all_groups.keys())
13904 for name in self.names:
13905 if name in all_uuid:
13906 self.wanted.append(name)
13907 elif name in name_to_uuid:
13908 self.wanted.append(name_to_uuid[name])
13910 missing.append(name)
13913 raise errors.OpPrereqError("Some groups do not exist: %s" %
13914 utils.CommaJoin(missing),
13915 errors.ECODE_NOENT)
13917 def DeclareLocks(self, lu, level):
13920 def _GetQueryData(self, lu):
13921 """Computes the list of node groups and their attributes.
13924 do_nodes = query.GQ_NODE in self.requested_data
13925 do_instances = query.GQ_INST in self.requested_data
13927 group_to_nodes = None
13928 group_to_instances = None
13930 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13931 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13932 # latter GetAllInstancesInfo() is not enough, for we have to go through
13933 # instance->node. Hence, we will need to process nodes even if we only need
13934 # instance information.
13935 if do_nodes or do_instances:
13936 all_nodes = lu.cfg.GetAllNodesInfo()
13937 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13940 for node in all_nodes.values():
13941 if node.group in group_to_nodes:
13942 group_to_nodes[node.group].append(node.name)
13943 node_to_group[node.name] = node.group
13946 all_instances = lu.cfg.GetAllInstancesInfo()
13947 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13949 for instance in all_instances.values():
13950 node = instance.primary_node
13951 if node in node_to_group:
13952 group_to_instances[node_to_group[node]].append(instance.name)
13955 # Do not pass on node information if it was not requested.
13956 group_to_nodes = None
13958 return query.GroupQueryData(self._cluster,
13959 [self._all_groups[uuid]
13960 for uuid in self.wanted],
13961 group_to_nodes, group_to_instances,
13962 query.GQ_DISKPARAMS in self.requested_data)
13965 class LUGroupQuery(NoHooksLU):
13966 """Logical unit for querying node groups.
13971 def CheckArguments(self):
13972 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13973 self.op.output_fields, False)
13975 def ExpandNames(self):
13976 self.gq.ExpandNames(self)
13978 def DeclareLocks(self, level):
13979 self.gq.DeclareLocks(self, level)
13981 def Exec(self, feedback_fn):
13982 return self.gq.OldStyleQuery(self)
13985 class LUGroupSetParams(LogicalUnit):
13986 """Modifies the parameters of a node group.
13989 HPATH = "group-modify"
13990 HTYPE = constants.HTYPE_GROUP
13993 def CheckArguments(self):
13996 self.op.diskparams,
13997 self.op.alloc_policy,
13999 self.op.disk_state,
14003 if all_changes.count(None) == len(all_changes):
14004 raise errors.OpPrereqError("Please pass at least one modification",
14005 errors.ECODE_INVAL)
14007 def ExpandNames(self):
14008 # This raises errors.OpPrereqError on its own:
14009 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14011 self.needed_locks = {
14012 locking.LEVEL_INSTANCE: [],
14013 locking.LEVEL_NODEGROUP: [self.group_uuid],
14016 self.share_locks[locking.LEVEL_INSTANCE] = 1
14018 def DeclareLocks(self, level):
14019 if level == locking.LEVEL_INSTANCE:
14020 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14022 # Lock instances optimistically, needs verification once group lock has
14024 self.needed_locks[locking.LEVEL_INSTANCE] = \
14025 self.cfg.GetNodeGroupInstances(self.group_uuid)
14028 def _UpdateAndVerifyDiskParams(old, new):
14029 """Updates and verifies disk parameters.
14032 new_params = _GetUpdatedParams(old, new)
14033 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14036 def CheckPrereq(self):
14037 """Check prerequisites.
14040 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14042 # Check if locked instances are still correct
14043 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14045 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14046 cluster = self.cfg.GetClusterInfo()
14048 if self.group is None:
14049 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14050 (self.op.group_name, self.group_uuid))
14052 if self.op.ndparams:
14053 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14054 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14055 self.new_ndparams = new_ndparams
14057 if self.op.diskparams:
14058 diskparams = self.group.diskparams
14059 uavdp = self._UpdateAndVerifyDiskParams
14060 # For each disktemplate subdict update and verify the values
14061 new_diskparams = dict((dt,
14062 uavdp(diskparams.get(dt, {}),
14063 self.op.diskparams[dt]))
14064 for dt in constants.DISK_TEMPLATES
14065 if dt in self.op.diskparams)
14066 # As we've all subdicts of diskparams ready, lets merge the actual
14067 # dict with all updated subdicts
14068 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14070 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14071 except errors.OpPrereqError, err:
14072 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14073 errors.ECODE_INVAL)
14075 if self.op.hv_state:
14076 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14077 self.group.hv_state_static)
14079 if self.op.disk_state:
14080 self.new_disk_state = \
14081 _MergeAndVerifyDiskState(self.op.disk_state,
14082 self.group.disk_state_static)
14084 if self.op.ipolicy:
14085 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14089 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14090 inst_filter = lambda inst: inst.name in owned_instances
14091 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14093 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14095 new_ipolicy, instances)
14098 self.LogWarning("After the ipolicy change the following instances"
14099 " violate them: %s",
14100 utils.CommaJoin(violations))
14102 def BuildHooksEnv(self):
14103 """Build hooks env.
14107 "GROUP_NAME": self.op.group_name,
14108 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14111 def BuildHooksNodes(self):
14112 """Build hooks nodes.
14115 mn = self.cfg.GetMasterNode()
14116 return ([mn], [mn])
14118 def Exec(self, feedback_fn):
14119 """Modifies the node group.
14124 if self.op.ndparams:
14125 self.group.ndparams = self.new_ndparams
14126 result.append(("ndparams", str(self.group.ndparams)))
14128 if self.op.diskparams:
14129 self.group.diskparams = self.new_diskparams
14130 result.append(("diskparams", str(self.group.diskparams)))
14132 if self.op.alloc_policy:
14133 self.group.alloc_policy = self.op.alloc_policy
14135 if self.op.hv_state:
14136 self.group.hv_state_static = self.new_hv_state
14138 if self.op.disk_state:
14139 self.group.disk_state_static = self.new_disk_state
14141 if self.op.ipolicy:
14142 self.group.ipolicy = self.new_ipolicy
14144 self.cfg.Update(self.group, feedback_fn)
14148 class LUGroupRemove(LogicalUnit):
14149 HPATH = "group-remove"
14150 HTYPE = constants.HTYPE_GROUP
14153 def ExpandNames(self):
14154 # This will raises errors.OpPrereqError on its own:
14155 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14156 self.needed_locks = {
14157 locking.LEVEL_NODEGROUP: [self.group_uuid],
14160 def CheckPrereq(self):
14161 """Check prerequisites.
14163 This checks that the given group name exists as a node group, that is
14164 empty (i.e., contains no nodes), and that is not the last group of the
14168 # Verify that the group is empty.
14169 group_nodes = [node.name
14170 for node in self.cfg.GetAllNodesInfo().values()
14171 if node.group == self.group_uuid]
14174 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14176 (self.op.group_name,
14177 utils.CommaJoin(utils.NiceSort(group_nodes))),
14178 errors.ECODE_STATE)
14180 # Verify the cluster would not be left group-less.
14181 if len(self.cfg.GetNodeGroupList()) == 1:
14182 raise errors.OpPrereqError("Group '%s' is the only group,"
14183 " cannot be removed" %
14184 self.op.group_name,
14185 errors.ECODE_STATE)
14187 def BuildHooksEnv(self):
14188 """Build hooks env.
14192 "GROUP_NAME": self.op.group_name,
14195 def BuildHooksNodes(self):
14196 """Build hooks nodes.
14199 mn = self.cfg.GetMasterNode()
14200 return ([mn], [mn])
14202 def Exec(self, feedback_fn):
14203 """Remove the node group.
14207 self.cfg.RemoveNodeGroup(self.group_uuid)
14208 except errors.ConfigurationError:
14209 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14210 (self.op.group_name, self.group_uuid))
14212 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14215 class LUGroupRename(LogicalUnit):
14216 HPATH = "group-rename"
14217 HTYPE = constants.HTYPE_GROUP
14220 def ExpandNames(self):
14221 # This raises errors.OpPrereqError on its own:
14222 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14224 self.needed_locks = {
14225 locking.LEVEL_NODEGROUP: [self.group_uuid],
14228 def CheckPrereq(self):
14229 """Check prerequisites.
14231 Ensures requested new name is not yet used.
14235 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14236 except errors.OpPrereqError:
14239 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14240 " node group (UUID: %s)" %
14241 (self.op.new_name, new_name_uuid),
14242 errors.ECODE_EXISTS)
14244 def BuildHooksEnv(self):
14245 """Build hooks env.
14249 "OLD_NAME": self.op.group_name,
14250 "NEW_NAME": self.op.new_name,
14253 def BuildHooksNodes(self):
14254 """Build hooks nodes.
14257 mn = self.cfg.GetMasterNode()
14259 all_nodes = self.cfg.GetAllNodesInfo()
14260 all_nodes.pop(mn, None)
14263 run_nodes.extend(node.name for node in all_nodes.values()
14264 if node.group == self.group_uuid)
14266 return (run_nodes, run_nodes)
14268 def Exec(self, feedback_fn):
14269 """Rename the node group.
14272 group = self.cfg.GetNodeGroup(self.group_uuid)
14275 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14276 (self.op.group_name, self.group_uuid))
14278 group.name = self.op.new_name
14279 self.cfg.Update(group, feedback_fn)
14281 return self.op.new_name
14284 class LUGroupEvacuate(LogicalUnit):
14285 HPATH = "group-evacuate"
14286 HTYPE = constants.HTYPE_GROUP
14289 def ExpandNames(self):
14290 # This raises errors.OpPrereqError on its own:
14291 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14293 if self.op.target_groups:
14294 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14295 self.op.target_groups)
14297 self.req_target_uuids = []
14299 if self.group_uuid in self.req_target_uuids:
14300 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14301 " as a target group (targets are %s)" %
14303 utils.CommaJoin(self.req_target_uuids)),
14304 errors.ECODE_INVAL)
14306 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14308 self.share_locks = _ShareAll()
14309 self.needed_locks = {
14310 locking.LEVEL_INSTANCE: [],
14311 locking.LEVEL_NODEGROUP: [],
14312 locking.LEVEL_NODE: [],
14315 def DeclareLocks(self, level):
14316 if level == locking.LEVEL_INSTANCE:
14317 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14319 # Lock instances optimistically, needs verification once node and group
14320 # locks have been acquired
14321 self.needed_locks[locking.LEVEL_INSTANCE] = \
14322 self.cfg.GetNodeGroupInstances(self.group_uuid)
14324 elif level == locking.LEVEL_NODEGROUP:
14325 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14327 if self.req_target_uuids:
14328 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14330 # Lock all groups used by instances optimistically; this requires going
14331 # via the node before it's locked, requiring verification later on
14332 lock_groups.update(group_uuid
14333 for instance_name in
14334 self.owned_locks(locking.LEVEL_INSTANCE)
14336 self.cfg.GetInstanceNodeGroups(instance_name))
14338 # No target groups, need to lock all of them
14339 lock_groups = locking.ALL_SET
14341 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14343 elif level == locking.LEVEL_NODE:
14344 # This will only lock the nodes in the group to be evacuated which
14345 # contain actual instances
14346 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14347 self._LockInstancesNodes()
14349 # Lock all nodes in group to be evacuated and target groups
14350 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14351 assert self.group_uuid in owned_groups
14352 member_nodes = [node_name
14353 for group in owned_groups
14354 for node_name in self.cfg.GetNodeGroup(group).members]
14355 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14357 def CheckPrereq(self):
14358 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14359 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14360 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14362 assert owned_groups.issuperset(self.req_target_uuids)
14363 assert self.group_uuid in owned_groups
14365 # Check if locked instances are still correct
14366 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14368 # Get instance information
14369 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14371 # Check if node groups for locked instances are still correct
14372 _CheckInstancesNodeGroups(self.cfg, self.instances,
14373 owned_groups, owned_nodes, self.group_uuid)
14375 if self.req_target_uuids:
14376 # User requested specific target groups
14377 self.target_uuids = self.req_target_uuids
14379 # All groups except the one to be evacuated are potential targets
14380 self.target_uuids = [group_uuid for group_uuid in owned_groups
14381 if group_uuid != self.group_uuid]
14383 if not self.target_uuids:
14384 raise errors.OpPrereqError("There are no possible target groups",
14385 errors.ECODE_INVAL)
14387 def BuildHooksEnv(self):
14388 """Build hooks env.
14392 "GROUP_NAME": self.op.group_name,
14393 "TARGET_GROUPS": " ".join(self.target_uuids),
14396 def BuildHooksNodes(self):
14397 """Build hooks nodes.
14400 mn = self.cfg.GetMasterNode()
14402 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14404 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14406 return (run_nodes, run_nodes)
14408 def Exec(self, feedback_fn):
14409 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14411 assert self.group_uuid not in self.target_uuids
14413 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14414 instances=instances, target_groups=self.target_uuids)
14416 ial.Run(self.op.iallocator)
14418 if not ial.success:
14419 raise errors.OpPrereqError("Can't compute group evacuation using"
14420 " iallocator '%s': %s" %
14421 (self.op.iallocator, ial.info),
14422 errors.ECODE_NORES)
14424 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14426 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14427 len(jobs), self.op.group_name)
14429 return ResultWithJobs(jobs)
14432 class TagsLU(NoHooksLU): # pylint: disable=W0223
14433 """Generic tags LU.
14435 This is an abstract class which is the parent of all the other tags LUs.
14438 def ExpandNames(self):
14439 self.group_uuid = None
14440 self.needed_locks = {}
14442 if self.op.kind == constants.TAG_NODE:
14443 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14444 lock_level = locking.LEVEL_NODE
14445 lock_name = self.op.name
14446 elif self.op.kind == constants.TAG_INSTANCE:
14447 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14448 lock_level = locking.LEVEL_INSTANCE
14449 lock_name = self.op.name
14450 elif self.op.kind == constants.TAG_NODEGROUP:
14451 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14452 lock_level = locking.LEVEL_NODEGROUP
14453 lock_name = self.group_uuid
14458 if lock_level and getattr(self.op, "use_locking", True):
14459 self.needed_locks[lock_level] = lock_name
14461 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14462 # not possible to acquire the BGL based on opcode parameters)
14464 def CheckPrereq(self):
14465 """Check prerequisites.
14468 if self.op.kind == constants.TAG_CLUSTER:
14469 self.target = self.cfg.GetClusterInfo()
14470 elif self.op.kind == constants.TAG_NODE:
14471 self.target = self.cfg.GetNodeInfo(self.op.name)
14472 elif self.op.kind == constants.TAG_INSTANCE:
14473 self.target = self.cfg.GetInstanceInfo(self.op.name)
14474 elif self.op.kind == constants.TAG_NODEGROUP:
14475 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14477 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14478 str(self.op.kind), errors.ECODE_INVAL)
14481 class LUTagsGet(TagsLU):
14482 """Returns the tags of a given object.
14487 def ExpandNames(self):
14488 TagsLU.ExpandNames(self)
14490 # Share locks as this is only a read operation
14491 self.share_locks = _ShareAll()
14493 def Exec(self, feedback_fn):
14494 """Returns the tag list.
14497 return list(self.target.GetTags())
14500 class LUTagsSearch(NoHooksLU):
14501 """Searches the tags for a given pattern.
14506 def ExpandNames(self):
14507 self.needed_locks = {}
14509 def CheckPrereq(self):
14510 """Check prerequisites.
14512 This checks the pattern passed for validity by compiling it.
14516 self.re = re.compile(self.op.pattern)
14517 except re.error, err:
14518 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14519 (self.op.pattern, err), errors.ECODE_INVAL)
14521 def Exec(self, feedback_fn):
14522 """Returns the tag list.
14526 tgts = [("/cluster", cfg.GetClusterInfo())]
14527 ilist = cfg.GetAllInstancesInfo().values()
14528 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14529 nlist = cfg.GetAllNodesInfo().values()
14530 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14531 tgts.extend(("/nodegroup/%s" % n.name, n)
14532 for n in cfg.GetAllNodeGroupsInfo().values())
14534 for path, target in tgts:
14535 for tag in target.GetTags():
14536 if self.re.search(tag):
14537 results.append((path, tag))
14541 class LUTagsSet(TagsLU):
14542 """Sets a tag on a given object.
14547 def CheckPrereq(self):
14548 """Check prerequisites.
14550 This checks the type and length of the tag name and value.
14553 TagsLU.CheckPrereq(self)
14554 for tag in self.op.tags:
14555 objects.TaggableObject.ValidateTag(tag)
14557 def Exec(self, feedback_fn):
14562 for tag in self.op.tags:
14563 self.target.AddTag(tag)
14564 except errors.TagError, err:
14565 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14566 self.cfg.Update(self.target, feedback_fn)
14569 class LUTagsDel(TagsLU):
14570 """Delete a list of tags from a given object.
14575 def CheckPrereq(self):
14576 """Check prerequisites.
14578 This checks that we have the given tag.
14581 TagsLU.CheckPrereq(self)
14582 for tag in self.op.tags:
14583 objects.TaggableObject.ValidateTag(tag)
14584 del_tags = frozenset(self.op.tags)
14585 cur_tags = self.target.GetTags()
14587 diff_tags = del_tags - cur_tags
14589 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14590 raise errors.OpPrereqError("Tag(s) %s not found" %
14591 (utils.CommaJoin(diff_names), ),
14592 errors.ECODE_NOENT)
14594 def Exec(self, feedback_fn):
14595 """Remove the tag from the object.
14598 for tag in self.op.tags:
14599 self.target.RemoveTag(tag)
14600 self.cfg.Update(self.target, feedback_fn)
14603 class LUTestDelay(NoHooksLU):
14604 """Sleep for a specified amount of time.
14606 This LU sleeps on the master and/or nodes for a specified amount of
14612 def ExpandNames(self):
14613 """Expand names and set required locks.
14615 This expands the node list, if any.
14618 self.needed_locks = {}
14619 if self.op.on_nodes:
14620 # _GetWantedNodes can be used here, but is not always appropriate to use
14621 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14622 # more information.
14623 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14624 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14626 def _TestDelay(self):
14627 """Do the actual sleep.
14630 if self.op.on_master:
14631 if not utils.TestDelay(self.op.duration):
14632 raise errors.OpExecError("Error during master delay test")
14633 if self.op.on_nodes:
14634 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14635 for node, node_result in result.items():
14636 node_result.Raise("Failure during rpc call to node %s" % node)
14638 def Exec(self, feedback_fn):
14639 """Execute the test delay opcode, with the wanted repetitions.
14642 if self.op.repeat == 0:
14645 top_value = self.op.repeat - 1
14646 for i in range(self.op.repeat):
14647 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14651 class LUTestJqueue(NoHooksLU):
14652 """Utility LU to test some aspects of the job queue.
14657 # Must be lower than default timeout for WaitForJobChange to see whether it
14658 # notices changed jobs
14659 _CLIENT_CONNECT_TIMEOUT = 20.0
14660 _CLIENT_CONFIRM_TIMEOUT = 60.0
14663 def _NotifyUsingSocket(cls, cb, errcls):
14664 """Opens a Unix socket and waits for another program to connect.
14667 @param cb: Callback to send socket name to client
14668 @type errcls: class
14669 @param errcls: Exception class to use for errors
14672 # Using a temporary directory as there's no easy way to create temporary
14673 # sockets without writing a custom loop around tempfile.mktemp and
14675 tmpdir = tempfile.mkdtemp()
14677 tmpsock = utils.PathJoin(tmpdir, "sock")
14679 logging.debug("Creating temporary socket at %s", tmpsock)
14680 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14685 # Send details to client
14688 # Wait for client to connect before continuing
14689 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14691 (conn, _) = sock.accept()
14692 except socket.error, err:
14693 raise errcls("Client didn't connect in time (%s)" % err)
14697 # Remove as soon as client is connected
14698 shutil.rmtree(tmpdir)
14700 # Wait for client to close
14703 # pylint: disable=E1101
14704 # Instance of '_socketobject' has no ... member
14705 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14707 except socket.error, err:
14708 raise errcls("Client failed to confirm notification (%s)" % err)
14712 def _SendNotification(self, test, arg, sockname):
14713 """Sends a notification to the client.
14716 @param test: Test name
14717 @param arg: Test argument (depends on test)
14718 @type sockname: string
14719 @param sockname: Socket path
14722 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14724 def _Notify(self, prereq, test, arg):
14725 """Notifies the client of a test.
14728 @param prereq: Whether this is a prereq-phase test
14730 @param test: Test name
14731 @param arg: Test argument (depends on test)
14735 errcls = errors.OpPrereqError
14737 errcls = errors.OpExecError
14739 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14743 def CheckArguments(self):
14744 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14745 self.expandnames_calls = 0
14747 def ExpandNames(self):
14748 checkargs_calls = getattr(self, "checkargs_calls", 0)
14749 if checkargs_calls < 1:
14750 raise errors.ProgrammerError("CheckArguments was not called")
14752 self.expandnames_calls += 1
14754 if self.op.notify_waitlock:
14755 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14757 self.LogInfo("Expanding names")
14759 # Get lock on master node (just to get a lock, not for a particular reason)
14760 self.needed_locks = {
14761 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14764 def Exec(self, feedback_fn):
14765 if self.expandnames_calls < 1:
14766 raise errors.ProgrammerError("ExpandNames was not called")
14768 if self.op.notify_exec:
14769 self._Notify(False, constants.JQT_EXEC, None)
14771 self.LogInfo("Executing")
14773 if self.op.log_messages:
14774 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14775 for idx, msg in enumerate(self.op.log_messages):
14776 self.LogInfo("Sending log message %s", idx + 1)
14777 feedback_fn(constants.JQT_MSGPREFIX + msg)
14778 # Report how many test messages have been sent
14779 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14782 raise errors.OpExecError("Opcode failure was requested")
14787 class IAllocator(object):
14788 """IAllocator framework.
14790 An IAllocator instance has three sets of attributes:
14791 - cfg that is needed to query the cluster
14792 - input data (all members of the _KEYS class attribute are required)
14793 - four buffer attributes (in|out_data|text), that represent the
14794 input (to the external script) in text and data structure format,
14795 and the output from it, again in two formats
14796 - the result variables from the script (success, info, nodes) for
14800 # pylint: disable=R0902
14801 # lots of instance attributes
14803 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14805 self.rpc = rpc_runner
14806 # init buffer variables
14807 self.in_text = self.out_text = self.in_data = self.out_data = None
14808 # init all input fields so that pylint is happy
14810 self.memory = self.disks = self.disk_template = self.spindle_use = None
14811 self.os = self.tags = self.nics = self.vcpus = None
14812 self.hypervisor = None
14813 self.relocate_from = None
14815 self.instances = None
14816 self.evac_mode = None
14817 self.target_groups = []
14819 self.required_nodes = None
14820 # init result fields
14821 self.success = self.info = self.result = None
14824 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14826 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14827 " IAllocator" % self.mode)
14829 keyset = [n for (n, _) in keydata]
14832 if key not in keyset:
14833 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14834 " IAllocator" % key)
14835 setattr(self, key, kwargs[key])
14838 if key not in kwargs:
14839 raise errors.ProgrammerError("Missing input parameter '%s' to"
14840 " IAllocator" % key)
14841 self._BuildInputData(compat.partial(fn, self), keydata)
14843 def _ComputeClusterData(self):
14844 """Compute the generic allocator input data.
14846 This is the data that is independent of the actual operation.
14850 cluster_info = cfg.GetClusterInfo()
14853 "version": constants.IALLOCATOR_VERSION,
14854 "cluster_name": cfg.GetClusterName(),
14855 "cluster_tags": list(cluster_info.GetTags()),
14856 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14857 "ipolicy": cluster_info.ipolicy,
14859 ninfo = cfg.GetAllNodesInfo()
14860 iinfo = cfg.GetAllInstancesInfo().values()
14861 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14864 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14866 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14867 hypervisor_name = self.hypervisor
14868 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14869 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14871 hypervisor_name = cluster_info.primary_hypervisor
14873 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14876 self.rpc.call_all_instances_info(node_list,
14877 cluster_info.enabled_hypervisors)
14879 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14881 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14882 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14883 i_list, config_ndata)
14884 assert len(data["nodes"]) == len(ninfo), \
14885 "Incomplete node data computed"
14887 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14889 self.in_data = data
14892 def _ComputeNodeGroupData(cfg):
14893 """Compute node groups data.
14896 cluster = cfg.GetClusterInfo()
14897 ng = dict((guuid, {
14898 "name": gdata.name,
14899 "alloc_policy": gdata.alloc_policy,
14900 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14902 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14907 def _ComputeBasicNodeData(cfg, node_cfg):
14908 """Compute global node data.
14911 @returns: a dict of name: (node dict, node config)
14914 # fill in static (config-based) values
14915 node_results = dict((ninfo.name, {
14916 "tags": list(ninfo.GetTags()),
14917 "primary_ip": ninfo.primary_ip,
14918 "secondary_ip": ninfo.secondary_ip,
14919 "offline": ninfo.offline,
14920 "drained": ninfo.drained,
14921 "master_candidate": ninfo.master_candidate,
14922 "group": ninfo.group,
14923 "master_capable": ninfo.master_capable,
14924 "vm_capable": ninfo.vm_capable,
14925 "ndparams": cfg.GetNdParams(ninfo),
14927 for ninfo in node_cfg.values())
14929 return node_results
14932 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14934 """Compute global node data.
14936 @param node_results: the basic node structures as filled from the config
14939 #TODO(dynmem): compute the right data on MAX and MIN memory
14940 # make a copy of the current dict
14941 node_results = dict(node_results)
14942 for nname, nresult in node_data.items():
14943 assert nname in node_results, "Missing basic data for node %s" % nname
14944 ninfo = node_cfg[nname]
14946 if not (ninfo.offline or ninfo.drained):
14947 nresult.Raise("Can't get data for node %s" % nname)
14948 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14950 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14952 for attr in ["memory_total", "memory_free", "memory_dom0",
14953 "vg_size", "vg_free", "cpu_total"]:
14954 if attr not in remote_info:
14955 raise errors.OpExecError("Node '%s' didn't return attribute"
14956 " '%s'" % (nname, attr))
14957 if not isinstance(remote_info[attr], int):
14958 raise errors.OpExecError("Node '%s' returned invalid value"
14960 (nname, attr, remote_info[attr]))
14961 # compute memory used by primary instances
14962 i_p_mem = i_p_up_mem = 0
14963 for iinfo, beinfo in i_list:
14964 if iinfo.primary_node == nname:
14965 i_p_mem += beinfo[constants.BE_MAXMEM]
14966 if iinfo.name not in node_iinfo[nname].payload:
14969 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14970 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14971 remote_info["memory_free"] -= max(0, i_mem_diff)
14973 if iinfo.admin_state == constants.ADMINST_UP:
14974 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14976 # compute memory used by instances
14978 "total_memory": remote_info["memory_total"],
14979 "reserved_memory": remote_info["memory_dom0"],
14980 "free_memory": remote_info["memory_free"],
14981 "total_disk": remote_info["vg_size"],
14982 "free_disk": remote_info["vg_free"],
14983 "total_cpus": remote_info["cpu_total"],
14984 "i_pri_memory": i_p_mem,
14985 "i_pri_up_memory": i_p_up_mem,
14987 pnr_dyn.update(node_results[nname])
14988 node_results[nname] = pnr_dyn
14990 return node_results
14993 def _ComputeInstanceData(cluster_info, i_list):
14994 """Compute global instance data.
14998 for iinfo, beinfo in i_list:
15000 for nic in iinfo.nics:
15001 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15005 "mode": filled_params[constants.NIC_MODE],
15006 "link": filled_params[constants.NIC_LINK],
15008 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15009 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15010 nic_data.append(nic_dict)
15012 "tags": list(iinfo.GetTags()),
15013 "admin_state": iinfo.admin_state,
15014 "vcpus": beinfo[constants.BE_VCPUS],
15015 "memory": beinfo[constants.BE_MAXMEM],
15016 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15018 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15020 "disks": [{constants.IDISK_SIZE: dsk.size,
15021 constants.IDISK_MODE: dsk.mode}
15022 for dsk in iinfo.disks],
15023 "disk_template": iinfo.disk_template,
15024 "hypervisor": iinfo.hypervisor,
15026 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15028 instance_data[iinfo.name] = pir
15030 return instance_data
15032 def _AddNewInstance(self):
15033 """Add new instance data to allocator structure.
15035 This in combination with _AllocatorGetClusterData will create the
15036 correct structure needed as input for the allocator.
15038 The checks for the completeness of the opcode must have already been
15042 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15044 if self.disk_template in constants.DTS_INT_MIRROR:
15045 self.required_nodes = 2
15047 self.required_nodes = 1
15051 "disk_template": self.disk_template,
15054 "vcpus": self.vcpus,
15055 "memory": self.memory,
15056 "spindle_use": self.spindle_use,
15057 "disks": self.disks,
15058 "disk_space_total": disk_space,
15060 "required_nodes": self.required_nodes,
15061 "hypervisor": self.hypervisor,
15066 def _AddRelocateInstance(self):
15067 """Add relocate instance data to allocator structure.
15069 This in combination with _IAllocatorGetClusterData will create the
15070 correct structure needed as input for the allocator.
15072 The checks for the completeness of the opcode must have already been
15076 instance = self.cfg.GetInstanceInfo(self.name)
15077 if instance is None:
15078 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15079 " IAllocator" % self.name)
15081 if instance.disk_template not in constants.DTS_MIRRORED:
15082 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15083 errors.ECODE_INVAL)
15085 if instance.disk_template in constants.DTS_INT_MIRROR and \
15086 len(instance.secondary_nodes) != 1:
15087 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15088 errors.ECODE_STATE)
15090 self.required_nodes = 1
15091 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15092 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15096 "disk_space_total": disk_space,
15097 "required_nodes": self.required_nodes,
15098 "relocate_from": self.relocate_from,
15102 def _AddNodeEvacuate(self):
15103 """Get data for node-evacuate requests.
15107 "instances": self.instances,
15108 "evac_mode": self.evac_mode,
15111 def _AddChangeGroup(self):
15112 """Get data for node-evacuate requests.
15116 "instances": self.instances,
15117 "target_groups": self.target_groups,
15120 def _BuildInputData(self, fn, keydata):
15121 """Build input data structures.
15124 self._ComputeClusterData()
15127 request["type"] = self.mode
15128 for keyname, keytype in keydata:
15129 if keyname not in request:
15130 raise errors.ProgrammerError("Request parameter %s is missing" %
15132 val = request[keyname]
15133 if not keytype(val):
15134 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15135 " validation, value %s, expected"
15136 " type %s" % (keyname, val, keytype))
15137 self.in_data["request"] = request
15139 self.in_text = serializer.Dump(self.in_data)
15141 _STRING_LIST = ht.TListOf(ht.TString)
15142 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15143 # pylint: disable=E1101
15144 # Class '...' has no 'OP_ID' member
15145 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15146 opcodes.OpInstanceMigrate.OP_ID,
15147 opcodes.OpInstanceReplaceDisks.OP_ID])
15151 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15152 ht.TItems([ht.TNonEmptyString,
15153 ht.TNonEmptyString,
15154 ht.TListOf(ht.TNonEmptyString),
15157 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15158 ht.TItems([ht.TNonEmptyString,
15161 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15162 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15165 constants.IALLOCATOR_MODE_ALLOC:
15168 ("name", ht.TString),
15169 ("memory", ht.TInt),
15170 ("spindle_use", ht.TInt),
15171 ("disks", ht.TListOf(ht.TDict)),
15172 ("disk_template", ht.TString),
15173 ("os", ht.TString),
15174 ("tags", _STRING_LIST),
15175 ("nics", ht.TListOf(ht.TDict)),
15176 ("vcpus", ht.TInt),
15177 ("hypervisor", ht.TString),
15179 constants.IALLOCATOR_MODE_RELOC:
15180 (_AddRelocateInstance,
15181 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15183 constants.IALLOCATOR_MODE_NODE_EVAC:
15184 (_AddNodeEvacuate, [
15185 ("instances", _STRING_LIST),
15186 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15188 constants.IALLOCATOR_MODE_CHG_GROUP:
15189 (_AddChangeGroup, [
15190 ("instances", _STRING_LIST),
15191 ("target_groups", _STRING_LIST),
15195 def Run(self, name, validate=True, call_fn=None):
15196 """Run an instance allocator and return the results.
15199 if call_fn is None:
15200 call_fn = self.rpc.call_iallocator_runner
15202 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15203 result.Raise("Failure while running the iallocator script")
15205 self.out_text = result.payload
15207 self._ValidateResult()
15209 def _ValidateResult(self):
15210 """Process the allocator results.
15212 This will process and if successful save the result in
15213 self.out_data and the other parameters.
15217 rdict = serializer.Load(self.out_text)
15218 except Exception, err:
15219 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15221 if not isinstance(rdict, dict):
15222 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15224 # TODO: remove backwards compatiblity in later versions
15225 if "nodes" in rdict and "result" not in rdict:
15226 rdict["result"] = rdict["nodes"]
15229 for key in "success", "info", "result":
15230 if key not in rdict:
15231 raise errors.OpExecError("Can't parse iallocator results:"
15232 " missing key '%s'" % key)
15233 setattr(self, key, rdict[key])
15235 if not self._result_check(self.result):
15236 raise errors.OpExecError("Iallocator returned invalid result,"
15237 " expected %s, got %s" %
15238 (self._result_check, self.result),
15239 errors.ECODE_INVAL)
15241 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15242 assert self.relocate_from is not None
15243 assert self.required_nodes == 1
15245 node2group = dict((name, ndata["group"])
15246 for (name, ndata) in self.in_data["nodes"].items())
15248 fn = compat.partial(self._NodesToGroups, node2group,
15249 self.in_data["nodegroups"])
15251 instance = self.cfg.GetInstanceInfo(self.name)
15252 request_groups = fn(self.relocate_from + [instance.primary_node])
15253 result_groups = fn(rdict["result"] + [instance.primary_node])
15255 if self.success and not set(result_groups).issubset(request_groups):
15256 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15257 " differ from original groups (%s)" %
15258 (utils.CommaJoin(result_groups),
15259 utils.CommaJoin(request_groups)))
15261 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15262 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15264 self.out_data = rdict
15267 def _NodesToGroups(node2group, groups, nodes):
15268 """Returns a list of unique group names for a list of nodes.
15270 @type node2group: dict
15271 @param node2group: Map from node name to group UUID
15273 @param groups: Group information
15275 @param nodes: Node names
15282 group_uuid = node2group[node]
15284 # Ignore unknown node
15288 group = groups[group_uuid]
15290 # Can't find group, let's use UUID
15291 group_name = group_uuid
15293 group_name = group["name"]
15295 result.add(group_name)
15297 return sorted(result)
15300 class LUTestAllocator(NoHooksLU):
15301 """Run allocator tests.
15303 This LU runs the allocator tests
15306 def CheckPrereq(self):
15307 """Check prerequisites.
15309 This checks the opcode parameters depending on the director and mode test.
15312 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15313 for attr in ["memory", "disks", "disk_template",
15314 "os", "tags", "nics", "vcpus"]:
15315 if not hasattr(self.op, attr):
15316 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15317 attr, errors.ECODE_INVAL)
15318 iname = self.cfg.ExpandInstanceName(self.op.name)
15319 if iname is not None:
15320 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15321 iname, errors.ECODE_EXISTS)
15322 if not isinstance(self.op.nics, list):
15323 raise errors.OpPrereqError("Invalid parameter 'nics'",
15324 errors.ECODE_INVAL)
15325 if not isinstance(self.op.disks, list):
15326 raise errors.OpPrereqError("Invalid parameter 'disks'",
15327 errors.ECODE_INVAL)
15328 for row in self.op.disks:
15329 if (not isinstance(row, dict) or
15330 constants.IDISK_SIZE not in row or
15331 not isinstance(row[constants.IDISK_SIZE], int) or
15332 constants.IDISK_MODE not in row or
15333 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15334 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15335 " parameter", errors.ECODE_INVAL)
15336 if self.op.hypervisor is None:
15337 self.op.hypervisor = self.cfg.GetHypervisorType()
15338 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15339 fname = _ExpandInstanceName(self.cfg, self.op.name)
15340 self.op.name = fname
15341 self.relocate_from = \
15342 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15343 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15344 constants.IALLOCATOR_MODE_NODE_EVAC):
15345 if not self.op.instances:
15346 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15347 self.op.instances = _GetWantedInstances(self, self.op.instances)
15349 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15350 self.op.mode, errors.ECODE_INVAL)
15352 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15353 if self.op.allocator is None:
15354 raise errors.OpPrereqError("Missing allocator name",
15355 errors.ECODE_INVAL)
15356 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15357 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15358 self.op.direction, errors.ECODE_INVAL)
15360 def Exec(self, feedback_fn):
15361 """Run the allocator test.
15364 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15365 ial = IAllocator(self.cfg, self.rpc,
15368 memory=self.op.memory,
15369 disks=self.op.disks,
15370 disk_template=self.op.disk_template,
15374 vcpus=self.op.vcpus,
15375 hypervisor=self.op.hypervisor,
15377 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15378 ial = IAllocator(self.cfg, self.rpc,
15381 relocate_from=list(self.relocate_from),
15383 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15384 ial = IAllocator(self.cfg, self.rpc,
15386 instances=self.op.instances,
15387 target_groups=self.op.target_groups)
15388 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15389 ial = IAllocator(self.cfg, self.rpc,
15391 instances=self.op.instances,
15392 evac_mode=self.op.evac_mode)
15394 raise errors.ProgrammerError("Uncatched mode %s in"
15395 " LUTestAllocator.Exec", self.op.mode)
15397 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15398 result = ial.in_text
15400 ial.Run(self.op.allocator, validate=False)
15401 result = ial.out_text
15405 #: Query type implementations
15407 constants.QR_CLUSTER: _ClusterQuery,
15408 constants.QR_INSTANCE: _InstanceQuery,
15409 constants.QR_NODE: _NodeQuery,
15410 constants.QR_GROUP: _GroupQuery,
15411 constants.QR_OS: _OsQuery,
15412 constants.QR_EXPORT: _ExportQuery,
15415 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15418 def _GetQueryImplementation(name):
15419 """Returns the implemtnation for a query type.
15421 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15425 return _QUERY_IMPL[name]
15427 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15428 errors.ECODE_INVAL)