4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 # pylint: disable=W0702
962 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
965 def _CheckOutputFields(static, dynamic, selected):
966 """Checks whether all selected fields are valid.
968 @type static: L{utils.FieldSet}
969 @param static: static fields set
970 @type dynamic: L{utils.FieldSet}
971 @param dynamic: dynamic fields set
978 delta = f.NonMatching(selected)
980 raise errors.OpPrereqError("Unknown output fields selected: %s"
981 % ",".join(delta), errors.ECODE_INVAL)
984 def _CheckGlobalHvParams(params):
985 """Validates that given hypervisor params are not global ones.
987 This will ensure that instances don't get customised versions of
991 used_globals = constants.HVC_GLOBALS.intersection(params)
993 msg = ("The following hypervisor parameters are global and cannot"
994 " be customized at instance level, please modify them at"
995 " cluster level: %s" % utils.CommaJoin(used_globals))
996 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
999 def _CheckNodeOnline(lu, node, msg=None):
1000 """Ensure that a given node is online.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @param msg: if passed, should be a message to replace the default one
1005 @raise errors.OpPrereqError: if the node is offline
1009 msg = "Can't use offline node"
1010 if lu.cfg.GetNodeInfo(node).offline:
1011 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014 def _CheckNodeNotDrained(lu, node):
1015 """Ensure that a given node is not drained.
1017 @param lu: the LU on behalf of which we make the check
1018 @param node: the node to check
1019 @raise errors.OpPrereqError: if the node is drained
1022 if lu.cfg.GetNodeInfo(node).drained:
1023 raise errors.OpPrereqError("Can't use drained node %s" % node,
1027 def _CheckNodeVmCapable(lu, node):
1028 """Ensure that a given node is vm capable.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is not vm capable
1035 if not lu.cfg.GetNodeInfo(node).vm_capable:
1036 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041 """Ensure that a node supports a given OS.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @param os_name: the OS to query about
1046 @param force_variant: whether to ignore variant errors
1047 @raise errors.OpPrereqError: if the node is not supporting the OS
1050 result = lu.rpc.call_os_get(node, os_name)
1051 result.Raise("OS '%s' not in supported OS list for node %s" %
1053 prereq=True, ecode=errors.ECODE_INVAL)
1054 if not force_variant:
1055 _CheckOSVariant(result.payload, os_name)
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059 """Ensure that a node has the given secondary ip.
1061 @type lu: L{LogicalUnit}
1062 @param lu: the LU on behalf of which we make the check
1064 @param node: the node to check
1065 @type secondary_ip: string
1066 @param secondary_ip: the ip to check
1067 @type prereq: boolean
1068 @param prereq: whether to throw a prerequisite or an execute error
1069 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074 result.Raise("Failure checking secondary ip on node %s" % node,
1075 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 if not result.payload:
1077 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078 " please fix and re-run this command" % secondary_ip)
1080 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082 raise errors.OpExecError(msg)
1085 def _GetClusterDomainSecret():
1086 """Reads the cluster domain secret.
1089 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094 """Ensure that an instance is in one of the required states.
1096 @param lu: the LU on behalf of which we make the check
1097 @param instance: the instance to check
1098 @param msg: if passed, should be a message to replace the default one
1099 @raise errors.OpPrereqError: if the instance is not in the required state
1103 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104 if instance.admin_state not in req_states:
1105 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106 (instance.name, instance.admin_state, msg),
1109 if constants.ADMINST_UP not in req_states:
1110 pnode = instance.primary_node
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1115 if instance.name in ins_l.payload:
1116 raise errors.OpPrereqError("Instance %s is running, %s" %
1117 (instance.name, msg), errors.ECODE_STATE)
1120 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1121 """Computes if value is in the desired range.
1123 @param name: name of the parameter for which we perform the check
1124 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1126 @param ipolicy: dictionary containing min, max and std values
1127 @param value: actual value that we want to use
1128 @return: None or element not meeting the criteria
1132 if value in [None, constants.VALUE_AUTO]:
1134 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1135 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1136 if value > max_v or min_v > value:
1138 fqn = "%s/%s" % (name, qualifier)
1141 return ("%s value %s is not in range [%s, %s]" %
1142 (fqn, value, min_v, max_v))
1146 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1147 nic_count, disk_sizes, spindle_use,
1148 _compute_fn=_ComputeMinMaxSpec):
1149 """Verifies ipolicy against provided specs.
1152 @param ipolicy: The ipolicy
1154 @param mem_size: The memory size
1155 @type cpu_count: int
1156 @param cpu_count: Used cpu cores
1157 @type disk_count: int
1158 @param disk_count: Number of disks used
1159 @type nic_count: int
1160 @param nic_count: Number of nics used
1161 @type disk_sizes: list of ints
1162 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1163 @type spindle_use: int
1164 @param spindle_use: The number of spindles this instance uses
1165 @param _compute_fn: The compute function (unittest only)
1166 @return: A list of violations, or an empty list of no violations are found
1169 assert disk_count == len(disk_sizes)
1172 (constants.ISPEC_MEM_SIZE, "", mem_size),
1173 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1174 (constants.ISPEC_DISK_COUNT, "", disk_count),
1175 (constants.ISPEC_NIC_COUNT, "", nic_count),
1176 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1177 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1178 for idx, d in enumerate(disk_sizes)]
1181 (_compute_fn(name, qualifier, ipolicy, value)
1182 for (name, qualifier, value) in test_settings))
1185 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1186 _compute_fn=_ComputeIPolicySpecViolation):
1187 """Compute if instance meets the specs of ipolicy.
1190 @param ipolicy: The ipolicy to verify against
1191 @type instance: L{objects.Instance}
1192 @param instance: The instance to verify
1193 @param _compute_fn: The function to verify ipolicy (unittest only)
1194 @see: L{_ComputeIPolicySpecViolation}
1197 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1198 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1199 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1200 disk_count = len(instance.disks)
1201 disk_sizes = [disk.size for disk in instance.disks]
1202 nic_count = len(instance.nics)
1204 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205 disk_sizes, spindle_use)
1208 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1209 _compute_fn=_ComputeIPolicySpecViolation):
1210 """Compute if instance specs meets the specs of ipolicy.
1213 @param ipolicy: The ipolicy to verify against
1214 @param instance_spec: dict
1215 @param instance_spec: The instance spec to verify
1216 @param _compute_fn: The function to verify ipolicy (unittest only)
1217 @see: L{_ComputeIPolicySpecViolation}
1220 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1221 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1222 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1223 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1224 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1225 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1227 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1228 disk_sizes, spindle_use)
1231 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1233 _compute_fn=_ComputeIPolicyInstanceViolation):
1234 """Compute if instance meets the specs of the new target group.
1236 @param ipolicy: The ipolicy to verify
1237 @param instance: The instance object to verify
1238 @param current_group: The current group of the instance
1239 @param target_group: The new group of the instance
1240 @param _compute_fn: The function to verify ipolicy (unittest only)
1241 @see: L{_ComputeIPolicySpecViolation}
1244 if current_group == target_group:
1247 return _compute_fn(ipolicy, instance)
1250 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1251 _compute_fn=_ComputeIPolicyNodeViolation):
1252 """Checks that the target node is correct in terms of instance policy.
1254 @param ipolicy: The ipolicy to verify
1255 @param instance: The instance object to verify
1256 @param node: The new node to relocate
1257 @param ignore: Ignore violations of the ipolicy
1258 @param _compute_fn: The function to verify ipolicy (unittest only)
1259 @see: L{_ComputeIPolicySpecViolation}
1262 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1263 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1266 msg = ("Instance does not meet target node group's (%s) instance"
1267 " policy: %s") % (node.group, utils.CommaJoin(res))
1271 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1274 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1275 """Computes a set of any instances that would violate the new ipolicy.
1277 @param old_ipolicy: The current (still in-place) ipolicy
1278 @param new_ipolicy: The new (to become) ipolicy
1279 @param instances: List of instances to verify
1280 @return: A list of instances which violates the new ipolicy but
1284 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1285 _ComputeViolatingInstances(old_ipolicy, instances))
1288 def _ExpandItemName(fn, name, kind):
1289 """Expand an item name.
1291 @param fn: the function to use for expansion
1292 @param name: requested item name
1293 @param kind: text description ('Node' or 'Instance')
1294 @return: the resolved (full) name
1295 @raise errors.OpPrereqError: if the item is not found
1298 full_name = fn(name)
1299 if full_name is None:
1300 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1305 def _ExpandNodeName(cfg, name):
1306 """Wrapper over L{_ExpandItemName} for nodes."""
1307 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1310 def _ExpandInstanceName(cfg, name):
1311 """Wrapper over L{_ExpandItemName} for instance."""
1312 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1315 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1316 minmem, maxmem, vcpus, nics, disk_template, disks,
1317 bep, hvp, hypervisor_name, tags):
1318 """Builds instance related env variables for hooks
1320 This builds the hook environment from individual variables.
1323 @param name: the name of the instance
1324 @type primary_node: string
1325 @param primary_node: the name of the instance's primary node
1326 @type secondary_nodes: list
1327 @param secondary_nodes: list of secondary nodes as strings
1328 @type os_type: string
1329 @param os_type: the name of the instance's OS
1330 @type status: string
1331 @param status: the desired status of the instance
1332 @type minmem: string
1333 @param minmem: the minimum memory size of the instance
1334 @type maxmem: string
1335 @param maxmem: the maximum memory size of the instance
1337 @param vcpus: the count of VCPUs the instance has
1339 @param nics: list of tuples (ip, mac, mode, link) representing
1340 the NICs the instance has
1341 @type disk_template: string
1342 @param disk_template: the disk template of the instance
1344 @param disks: the list of (size, mode) pairs
1346 @param bep: the backend parameters for the instance
1348 @param hvp: the hypervisor parameters for the instance
1349 @type hypervisor_name: string
1350 @param hypervisor_name: the hypervisor for the instance
1352 @param tags: list of instance tags as strings
1354 @return: the hook environment for this instance
1359 "INSTANCE_NAME": name,
1360 "INSTANCE_PRIMARY": primary_node,
1361 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1362 "INSTANCE_OS_TYPE": os_type,
1363 "INSTANCE_STATUS": status,
1364 "INSTANCE_MINMEM": minmem,
1365 "INSTANCE_MAXMEM": maxmem,
1366 # TODO(2.7) remove deprecated "memory" value
1367 "INSTANCE_MEMORY": maxmem,
1368 "INSTANCE_VCPUS": vcpus,
1369 "INSTANCE_DISK_TEMPLATE": disk_template,
1370 "INSTANCE_HYPERVISOR": hypervisor_name,
1373 nic_count = len(nics)
1374 for idx, (ip, mac, mode, link) in enumerate(nics):
1377 env["INSTANCE_NIC%d_IP" % idx] = ip
1378 env["INSTANCE_NIC%d_MAC" % idx] = mac
1379 env["INSTANCE_NIC%d_MODE" % idx] = mode
1380 env["INSTANCE_NIC%d_LINK" % idx] = link
1381 if mode == constants.NIC_MODE_BRIDGED:
1382 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1386 env["INSTANCE_NIC_COUNT"] = nic_count
1389 disk_count = len(disks)
1390 for idx, (size, mode) in enumerate(disks):
1391 env["INSTANCE_DISK%d_SIZE" % idx] = size
1392 env["INSTANCE_DISK%d_MODE" % idx] = mode
1396 env["INSTANCE_DISK_COUNT"] = disk_count
1401 env["INSTANCE_TAGS"] = " ".join(tags)
1403 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1404 for key, value in source.items():
1405 env["INSTANCE_%s_%s" % (kind, key)] = value
1410 def _NICListToTuple(lu, nics):
1411 """Build a list of nic information tuples.
1413 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1414 value in LUInstanceQueryData.
1416 @type lu: L{LogicalUnit}
1417 @param lu: the logical unit on whose behalf we execute
1418 @type nics: list of L{objects.NIC}
1419 @param nics: list of nics to convert to hooks tuples
1423 cluster = lu.cfg.GetClusterInfo()
1427 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1428 mode = filled_params[constants.NIC_MODE]
1429 link = filled_params[constants.NIC_LINK]
1430 hooks_nics.append((ip, mac, mode, link))
1434 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1435 """Builds instance related env variables for hooks from an object.
1437 @type lu: L{LogicalUnit}
1438 @param lu: the logical unit on whose behalf we execute
1439 @type instance: L{objects.Instance}
1440 @param instance: the instance for which we should build the
1442 @type override: dict
1443 @param override: dictionary with key/values that will override
1446 @return: the hook environment dictionary
1449 cluster = lu.cfg.GetClusterInfo()
1450 bep = cluster.FillBE(instance)
1451 hvp = cluster.FillHV(instance)
1453 "name": instance.name,
1454 "primary_node": instance.primary_node,
1455 "secondary_nodes": instance.secondary_nodes,
1456 "os_type": instance.os,
1457 "status": instance.admin_state,
1458 "maxmem": bep[constants.BE_MAXMEM],
1459 "minmem": bep[constants.BE_MINMEM],
1460 "vcpus": bep[constants.BE_VCPUS],
1461 "nics": _NICListToTuple(lu, instance.nics),
1462 "disk_template": instance.disk_template,
1463 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1466 "hypervisor_name": instance.hypervisor,
1467 "tags": instance.tags,
1470 args.update(override)
1471 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1474 def _AdjustCandidatePool(lu, exceptions):
1475 """Adjust the candidate pool after node operations.
1478 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1480 lu.LogInfo("Promoted nodes to master candidate role: %s",
1481 utils.CommaJoin(node.name for node in mod_list))
1482 for name in mod_list:
1483 lu.context.ReaddNode(name)
1484 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1486 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1490 def _DecideSelfPromotion(lu, exceptions=None):
1491 """Decide whether I should promote myself as a master candidate.
1494 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1495 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496 # the new node will increase mc_max with one, so:
1497 mc_should = min(mc_should + 1, cp_size)
1498 return mc_now < mc_should
1501 def _CalculateGroupIPolicy(cluster, group):
1502 """Calculate instance policy for group.
1505 return cluster.SimpleFillIPolicy(group.ipolicy)
1508 def _ComputeViolatingInstances(ipolicy, instances):
1509 """Computes a set of instances who violates given ipolicy.
1511 @param ipolicy: The ipolicy to verify
1512 @type instances: object.Instance
1513 @param instances: List of instances to verify
1514 @return: A frozenset of instance names violating the ipolicy
1517 return frozenset([inst.name for inst in instances
1518 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1521 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1522 """Check that the brigdes needed by a list of nics exist.
1525 cluster = lu.cfg.GetClusterInfo()
1526 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1527 brlist = [params[constants.NIC_LINK] for params in paramslist
1528 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1530 result = lu.rpc.call_bridges_exist(target_node, brlist)
1531 result.Raise("Error checking bridges on destination node '%s'" %
1532 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1535 def _CheckInstanceBridgesExist(lu, instance, node=None):
1536 """Check that the brigdes needed by an instance exist.
1540 node = instance.primary_node
1541 _CheckNicsBridgesExist(lu, instance.nics, node)
1544 def _CheckOSVariant(os_obj, name):
1545 """Check whether an OS name conforms to the os variants specification.
1547 @type os_obj: L{objects.OS}
1548 @param os_obj: OS object to check
1550 @param name: OS name passed by the user, to check for validity
1553 variant = objects.OS.GetVariant(name)
1554 if not os_obj.supported_variants:
1556 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1557 " passed)" % (os_obj.name, variant),
1561 raise errors.OpPrereqError("OS name must include a variant",
1564 if variant not in os_obj.supported_variants:
1565 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1568 def _GetNodeInstancesInner(cfg, fn):
1569 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1572 def _GetNodeInstances(cfg, node_name):
1573 """Returns a list of all primary and secondary instances on a node.
1577 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1580 def _GetNodePrimaryInstances(cfg, node_name):
1581 """Returns primary instances on a node.
1584 return _GetNodeInstancesInner(cfg,
1585 lambda inst: node_name == inst.primary_node)
1588 def _GetNodeSecondaryInstances(cfg, node_name):
1589 """Returns secondary instances on a node.
1592 return _GetNodeInstancesInner(cfg,
1593 lambda inst: node_name in inst.secondary_nodes)
1596 def _GetStorageTypeArgs(cfg, storage_type):
1597 """Returns the arguments for a storage type.
1600 # Special case for file storage
1601 if storage_type == constants.ST_FILE:
1602 # storage.FileStorage wants a list of storage directories
1603 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1608 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1611 for dev in instance.disks:
1612 cfg.SetDiskID(dev, node_name)
1614 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1616 result.Raise("Failed to get disk status from node %s" % node_name,
1617 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1619 for idx, bdev_status in enumerate(result.payload):
1620 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1626 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1627 """Check the sanity of iallocator and node arguments and use the
1628 cluster-wide iallocator if appropriate.
1630 Check that at most one of (iallocator, node) is specified. If none is
1631 specified, then the LU's opcode's iallocator slot is filled with the
1632 cluster-wide default iallocator.
1634 @type iallocator_slot: string
1635 @param iallocator_slot: the name of the opcode iallocator slot
1636 @type node_slot: string
1637 @param node_slot: the name of the opcode target node slot
1640 node = getattr(lu.op, node_slot, None)
1641 iallocator = getattr(lu.op, iallocator_slot, None)
1643 if node is not None and iallocator is not None:
1644 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1646 elif node is None and iallocator is None:
1647 default_iallocator = lu.cfg.GetDefaultIAllocator()
1648 if default_iallocator:
1649 setattr(lu.op, iallocator_slot, default_iallocator)
1651 raise errors.OpPrereqError("No iallocator or node given and no"
1652 " cluster-wide default iallocator found;"
1653 " please specify either an iallocator or a"
1654 " node, or set a cluster-wide default"
1658 def _GetDefaultIAllocator(cfg, iallocator):
1659 """Decides on which iallocator to use.
1661 @type cfg: L{config.ConfigWriter}
1662 @param cfg: Cluster configuration object
1663 @type iallocator: string or None
1664 @param iallocator: Iallocator specified in opcode
1666 @return: Iallocator name
1670 # Use default iallocator
1671 iallocator = cfg.GetDefaultIAllocator()
1674 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1675 " opcode nor as a cluster-wide default",
1681 class LUClusterPostInit(LogicalUnit):
1682 """Logical unit for running hooks after cluster initialization.
1685 HPATH = "cluster-init"
1686 HTYPE = constants.HTYPE_CLUSTER
1688 def BuildHooksEnv(self):
1693 "OP_TARGET": self.cfg.GetClusterName(),
1696 def BuildHooksNodes(self):
1697 """Build hooks nodes.
1700 return ([], [self.cfg.GetMasterNode()])
1702 def Exec(self, feedback_fn):
1709 class LUClusterDestroy(LogicalUnit):
1710 """Logical unit for destroying the cluster.
1713 HPATH = "cluster-destroy"
1714 HTYPE = constants.HTYPE_CLUSTER
1716 def BuildHooksEnv(self):
1721 "OP_TARGET": self.cfg.GetClusterName(),
1724 def BuildHooksNodes(self):
1725 """Build hooks nodes.
1730 def CheckPrereq(self):
1731 """Check prerequisites.
1733 This checks whether the cluster is empty.
1735 Any errors are signaled by raising errors.OpPrereqError.
1738 master = self.cfg.GetMasterNode()
1740 nodelist = self.cfg.GetNodeList()
1741 if len(nodelist) != 1 or nodelist[0] != master:
1742 raise errors.OpPrereqError("There are still %d node(s) in"
1743 " this cluster." % (len(nodelist) - 1),
1745 instancelist = self.cfg.GetInstanceList()
1747 raise errors.OpPrereqError("There are still %d instance(s) in"
1748 " this cluster." % len(instancelist),
1751 def Exec(self, feedback_fn):
1752 """Destroys the cluster.
1755 master_params = self.cfg.GetMasterNetworkParameters()
1757 # Run post hooks on master node before it's removed
1758 _RunPostHook(self, master_params.name)
1760 ems = self.cfg.GetUseExternalMipScript()
1761 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1764 self.LogWarning("Error disabling the master IP address: %s",
1767 return master_params.name
1770 def _VerifyCertificate(filename):
1771 """Verifies a certificate for L{LUClusterVerifyConfig}.
1773 @type filename: string
1774 @param filename: Path to PEM file
1778 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1779 utils.ReadFile(filename))
1780 except Exception, err: # pylint: disable=W0703
1781 return (LUClusterVerifyConfig.ETYPE_ERROR,
1782 "Failed to load X509 certificate %s: %s" % (filename, err))
1785 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1786 constants.SSL_CERT_EXPIRATION_ERROR)
1789 fnamemsg = "While verifying %s: %s" % (filename, msg)
1794 return (None, fnamemsg)
1795 elif errcode == utils.CERT_WARNING:
1796 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1797 elif errcode == utils.CERT_ERROR:
1798 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1800 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1803 def _GetAllHypervisorParameters(cluster, instances):
1804 """Compute the set of all hypervisor parameters.
1806 @type cluster: L{objects.Cluster}
1807 @param cluster: the cluster object
1808 @param instances: list of L{objects.Instance}
1809 @param instances: additional instances from which to obtain parameters
1810 @rtype: list of (origin, hypervisor, parameters)
1811 @return: a list with all parameters found, indicating the hypervisor they
1812 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1817 for hv_name in cluster.enabled_hypervisors:
1818 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1820 for os_name, os_hvp in cluster.os_hvp.items():
1821 for hv_name, hv_params in os_hvp.items():
1823 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1824 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1826 # TODO: collapse identical parameter values in a single one
1827 for instance in instances:
1828 if instance.hvparams:
1829 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1830 cluster.FillHV(instance)))
1835 class _VerifyErrors(object):
1836 """Mix-in for cluster/group verify LUs.
1838 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1839 self.op and self._feedback_fn to be available.)
1843 ETYPE_FIELD = "code"
1844 ETYPE_ERROR = "ERROR"
1845 ETYPE_WARNING = "WARNING"
1847 def _Error(self, ecode, item, msg, *args, **kwargs):
1848 """Format an error message.
1850 Based on the opcode's error_codes parameter, either format a
1851 parseable error code, or a simpler error string.
1853 This must be called only from Exec and functions called from Exec.
1856 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1857 itype, etxt, _ = ecode
1858 # first complete the msg
1861 # then format the whole message
1862 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1863 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1869 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1870 # and finally report it via the feedback_fn
1871 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1873 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1874 """Log an error message if the passed condition is True.
1878 or self.op.debug_simulate_errors) # pylint: disable=E1101
1880 # If the error code is in the list of ignored errors, demote the error to a
1882 (_, etxt, _) = ecode
1883 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1884 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1887 self._Error(ecode, *args, **kwargs)
1889 # do not mark the operation as failed for WARN cases only
1890 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1891 self.bad = self.bad or cond
1894 class LUClusterVerify(NoHooksLU):
1895 """Submits all jobs necessary to verify the cluster.
1900 def ExpandNames(self):
1901 self.needed_locks = {}
1903 def Exec(self, feedback_fn):
1906 if self.op.group_name:
1907 groups = [self.op.group_name]
1908 depends_fn = lambda: None
1910 groups = self.cfg.GetNodeGroupList()
1912 # Verify global configuration
1914 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1917 # Always depend on global verification
1918 depends_fn = lambda: [(-len(jobs), [])]
1920 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1921 ignore_errors=self.op.ignore_errors,
1922 depends=depends_fn())]
1923 for group in groups)
1925 # Fix up all parameters
1926 for op in itertools.chain(*jobs): # pylint: disable=W0142
1927 op.debug_simulate_errors = self.op.debug_simulate_errors
1928 op.verbose = self.op.verbose
1929 op.error_codes = self.op.error_codes
1931 op.skip_checks = self.op.skip_checks
1932 except AttributeError:
1933 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1935 return ResultWithJobs(jobs)
1938 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1939 """Verifies the cluster config.
1944 def _VerifyHVP(self, hvp_data):
1945 """Verifies locally the syntax of the hypervisor parameters.
1948 for item, hv_name, hv_params in hvp_data:
1949 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1952 hv_class = hypervisor.GetHypervisor(hv_name)
1953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1954 hv_class.CheckParameterSyntax(hv_params)
1955 except errors.GenericError, err:
1956 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1958 def ExpandNames(self):
1959 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1960 self.share_locks = _ShareAll()
1962 def CheckPrereq(self):
1963 """Check prerequisites.
1966 # Retrieve all information
1967 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1968 self.all_node_info = self.cfg.GetAllNodesInfo()
1969 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1971 def Exec(self, feedback_fn):
1972 """Verify integrity of cluster, performing various test on nodes.
1976 self._feedback_fn = feedback_fn
1978 feedback_fn("* Verifying cluster config")
1980 for msg in self.cfg.VerifyConfig():
1981 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1983 feedback_fn("* Verifying cluster certificate files")
1985 for cert_filename in constants.ALL_CERT_FILES:
1986 (errcode, msg) = _VerifyCertificate(cert_filename)
1987 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1989 feedback_fn("* Verifying hypervisor parameters")
1991 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1992 self.all_inst_info.values()))
1994 feedback_fn("* Verifying all nodes belong to an existing group")
1996 # We do this verification here because, should this bogus circumstance
1997 # occur, it would never be caught by VerifyGroup, which only acts on
1998 # nodes/instances reachable from existing node groups.
2000 dangling_nodes = set(node.name for node in self.all_node_info.values()
2001 if node.group not in self.all_group_info)
2003 dangling_instances = {}
2004 no_node_instances = []
2006 for inst in self.all_inst_info.values():
2007 if inst.primary_node in dangling_nodes:
2008 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2009 elif inst.primary_node not in self.all_node_info:
2010 no_node_instances.append(inst.name)
2015 utils.CommaJoin(dangling_instances.get(node.name,
2017 for node in dangling_nodes]
2019 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2021 "the following nodes (and their instances) belong to a non"
2022 " existing group: %s", utils.CommaJoin(pretty_dangling))
2024 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2026 "the following instances have a non-existing primary-node:"
2027 " %s", utils.CommaJoin(no_node_instances))
2032 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2033 """Verifies the status of a node group.
2036 HPATH = "cluster-verify"
2037 HTYPE = constants.HTYPE_CLUSTER
2040 _HOOKS_INDENT_RE = re.compile("^", re.M)
2042 class NodeImage(object):
2043 """A class representing the logical and physical status of a node.
2046 @ivar name: the node name to which this object refers
2047 @ivar volumes: a structure as returned from
2048 L{ganeti.backend.GetVolumeList} (runtime)
2049 @ivar instances: a list of running instances (runtime)
2050 @ivar pinst: list of configured primary instances (config)
2051 @ivar sinst: list of configured secondary instances (config)
2052 @ivar sbp: dictionary of {primary-node: list of instances} for all
2053 instances for which this node is secondary (config)
2054 @ivar mfree: free memory, as reported by hypervisor (runtime)
2055 @ivar dfree: free disk, as reported by the node (runtime)
2056 @ivar offline: the offline status (config)
2057 @type rpc_fail: boolean
2058 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2059 not whether the individual keys were correct) (runtime)
2060 @type lvm_fail: boolean
2061 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2062 @type hyp_fail: boolean
2063 @ivar hyp_fail: whether the RPC call didn't return the instance list
2064 @type ghost: boolean
2065 @ivar ghost: whether this is a known node or not (config)
2066 @type os_fail: boolean
2067 @ivar os_fail: whether the RPC call didn't return valid OS data
2069 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2070 @type vm_capable: boolean
2071 @ivar vm_capable: whether the node can host instances
2074 def __init__(self, offline=False, name=None, vm_capable=True):
2083 self.offline = offline
2084 self.vm_capable = vm_capable
2085 self.rpc_fail = False
2086 self.lvm_fail = False
2087 self.hyp_fail = False
2089 self.os_fail = False
2092 def ExpandNames(self):
2093 # This raises errors.OpPrereqError on its own:
2094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2096 # Get instances in node group; this is unsafe and needs verification later
2098 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2100 self.needed_locks = {
2101 locking.LEVEL_INSTANCE: inst_names,
2102 locking.LEVEL_NODEGROUP: [self.group_uuid],
2103 locking.LEVEL_NODE: [],
2106 self.share_locks = _ShareAll()
2108 def DeclareLocks(self, level):
2109 if level == locking.LEVEL_NODE:
2110 # Get members of node group; this is unsafe and needs verification later
2111 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2113 all_inst_info = self.cfg.GetAllInstancesInfo()
2115 # In Exec(), we warn about mirrored instances that have primary and
2116 # secondary living in separate node groups. To fully verify that
2117 # volumes for these instances are healthy, we will need to do an
2118 # extra call to their secondaries. We ensure here those nodes will
2120 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2121 # Important: access only the instances whose lock is owned
2122 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2123 nodes.update(all_inst_info[inst].secondary_nodes)
2125 self.needed_locks[locking.LEVEL_NODE] = nodes
2127 def CheckPrereq(self):
2128 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2129 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2131 group_nodes = set(self.group_info.members)
2133 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2136 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2138 unlocked_instances = \
2139 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2142 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2143 utils.CommaJoin(unlocked_nodes),
2146 if unlocked_instances:
2147 raise errors.OpPrereqError("Missing lock for instances: %s" %
2148 utils.CommaJoin(unlocked_instances),
2151 self.all_node_info = self.cfg.GetAllNodesInfo()
2152 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2154 self.my_node_names = utils.NiceSort(group_nodes)
2155 self.my_inst_names = utils.NiceSort(group_instances)
2157 self.my_node_info = dict((name, self.all_node_info[name])
2158 for name in self.my_node_names)
2160 self.my_inst_info = dict((name, self.all_inst_info[name])
2161 for name in self.my_inst_names)
2163 # We detect here the nodes that will need the extra RPC calls for verifying
2164 # split LV volumes; they should be locked.
2165 extra_lv_nodes = set()
2167 for inst in self.my_inst_info.values():
2168 if inst.disk_template in constants.DTS_INT_MIRROR:
2169 for nname in inst.all_nodes:
2170 if self.all_node_info[nname].group != self.group_uuid:
2171 extra_lv_nodes.add(nname)
2173 unlocked_lv_nodes = \
2174 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2176 if unlocked_lv_nodes:
2177 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2178 utils.CommaJoin(unlocked_lv_nodes),
2180 self.extra_lv_nodes = list(extra_lv_nodes)
2182 def _VerifyNode(self, ninfo, nresult):
2183 """Perform some basic validation on data returned from a node.
2185 - check the result data structure is well formed and has all the
2187 - check ganeti version
2189 @type ninfo: L{objects.Node}
2190 @param ninfo: the node to check
2191 @param nresult: the results from the node
2193 @return: whether overall this call was successful (and we can expect
2194 reasonable values in the respose)
2198 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2200 # main result, nresult should be a non-empty dict
2201 test = not nresult or not isinstance(nresult, dict)
2202 _ErrorIf(test, constants.CV_ENODERPC, node,
2203 "unable to verify node: no data returned")
2207 # compares ganeti version
2208 local_version = constants.PROTOCOL_VERSION
2209 remote_version = nresult.get("version", None)
2210 test = not (remote_version and
2211 isinstance(remote_version, (list, tuple)) and
2212 len(remote_version) == 2)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "connection to node returned invalid data")
2218 test = local_version != remote_version[0]
2219 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2220 "incompatible protocol versions: master %s,"
2221 " node %s", local_version, remote_version[0])
2225 # node seems compatible, we can actually try to look into its results
2227 # full package version
2228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2229 constants.CV_ENODEVERSION, node,
2230 "software version mismatch: master %s, node %s",
2231 constants.RELEASE_VERSION, remote_version[1],
2232 code=self.ETYPE_WARNING)
2234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2235 if ninfo.vm_capable and isinstance(hyp_result, dict):
2236 for hv_name, hv_result in hyp_result.iteritems():
2237 test = hv_result is not None
2238 _ErrorIf(test, constants.CV_ENODEHV, node,
2239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2241 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2242 if ninfo.vm_capable and isinstance(hvp_result, list):
2243 for item, hv_name, hv_result in hvp_result:
2244 _ErrorIf(True, constants.CV_ENODEHV, node,
2245 "hypervisor %s parameter verify failure (source %s): %s",
2246 hv_name, item, hv_result)
2248 test = nresult.get(constants.NV_NODESETUP,
2249 ["Missing NODESETUP results"])
2250 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2255 def _VerifyNodeTime(self, ninfo, nresult,
2256 nvinfo_starttime, nvinfo_endtime):
2257 """Check the node time.
2259 @type ninfo: L{objects.Node}
2260 @param ninfo: the node to check
2261 @param nresult: the remote results for the node
2262 @param nvinfo_starttime: the start time of the RPC call
2263 @param nvinfo_endtime: the end time of the RPC call
2267 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2269 ntime = nresult.get(constants.NV_TIME, None)
2271 ntime_merged = utils.MergeTime(ntime)
2272 except (ValueError, TypeError):
2273 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2276 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2277 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2278 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2279 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2283 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2284 "Node time diverges by at least %s from master node time",
2287 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2288 """Check the node LVM results.
2290 @type ninfo: L{objects.Node}
2291 @param ninfo: the node to check
2292 @param nresult: the remote results for the node
2293 @param vg_name: the configured VG name
2300 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2302 # checks vg existence and size > 20G
2303 vglist = nresult.get(constants.NV_VGLIST, None)
2305 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2307 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2308 constants.MIN_VG_SIZE)
2309 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2312 pvlist = nresult.get(constants.NV_PVLIST, None)
2313 test = pvlist is None
2314 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2316 # check that ':' is not present in PV names, since it's a
2317 # special character for lvcreate (denotes the range of PEs to
2319 for _, pvname, owner_vg in pvlist:
2320 test = ":" in pvname
2321 _ErrorIf(test, constants.CV_ENODELVM, node,
2322 "Invalid character ':' in PV '%s' of VG '%s'",
2325 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2326 """Check the node bridges.
2328 @type ninfo: L{objects.Node}
2329 @param ninfo: the node to check
2330 @param nresult: the remote results for the node
2331 @param bridges: the expected list of bridges
2338 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2340 missing = nresult.get(constants.NV_BRIDGES, None)
2341 test = not isinstance(missing, list)
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "did not return valid bridge information")
2345 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2346 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2348 def _VerifyNodeUserScripts(self, ninfo, nresult):
2349 """Check the results of user scripts presence and executability on the node
2351 @type ninfo: L{objects.Node}
2352 @param ninfo: the node to check
2353 @param nresult: the remote results for the node
2358 test = not constants.NV_USERSCRIPTS in nresult
2359 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2360 "did not return user scripts information")
2362 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2364 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2365 "user scripts not present or not executable: %s" %
2366 utils.CommaJoin(sorted(broken_scripts)))
2368 def _VerifyNodeNetwork(self, ninfo, nresult):
2369 """Check the node network connectivity results.
2371 @type ninfo: L{objects.Node}
2372 @param ninfo: the node to check
2373 @param nresult: the remote results for the node
2377 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2379 test = constants.NV_NODELIST not in nresult
2380 _ErrorIf(test, constants.CV_ENODESSH, node,
2381 "node hasn't returned node ssh connectivity data")
2383 if nresult[constants.NV_NODELIST]:
2384 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2385 _ErrorIf(True, constants.CV_ENODESSH, node,
2386 "ssh communication with node '%s': %s", a_node, a_msg)
2388 test = constants.NV_NODENETTEST not in nresult
2389 _ErrorIf(test, constants.CV_ENODENET, node,
2390 "node hasn't returned node tcp connectivity data")
2392 if nresult[constants.NV_NODENETTEST]:
2393 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2395 _ErrorIf(True, constants.CV_ENODENET, node,
2396 "tcp communication with node '%s': %s",
2397 anode, nresult[constants.NV_NODENETTEST][anode])
2399 test = constants.NV_MASTERIP not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node master IP reachability data")
2403 if not nresult[constants.NV_MASTERIP]:
2404 if node == self.master_node:
2405 msg = "the master node cannot reach the master IP (not configured?)"
2407 msg = "cannot reach the master IP"
2408 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2410 def _VerifyInstance(self, instance, instanceconfig, node_image,
2412 """Verify an instance.
2414 This function checks to see if the required block devices are
2415 available on the instance's node.
2418 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2419 node_current = instanceconfig.primary_node
2421 node_vol_should = {}
2422 instanceconfig.MapLVsByNode(node_vol_should)
2424 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2425 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2426 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2428 for node in node_vol_should:
2429 n_img = node_image[node]
2430 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2431 # ignore missing volumes on offline or broken nodes
2433 for volume in node_vol_should[node]:
2434 test = volume not in n_img.volumes
2435 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2436 "volume %s missing on node %s", volume, node)
2438 if instanceconfig.admin_state == constants.ADMINST_UP:
2439 pri_img = node_image[node_current]
2440 test = instance not in pri_img.instances and not pri_img.offline
2441 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2442 "instance not running on its primary node %s",
2445 diskdata = [(nname, success, status, idx)
2446 for (nname, disks) in diskstatus.items()
2447 for idx, (success, status) in enumerate(disks)]
2449 for nname, success, bdev_status, idx in diskdata:
2450 # the 'ghost node' construction in Exec() ensures that we have a
2452 snode = node_image[nname]
2453 bad_snode = snode.ghost or snode.offline
2454 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2455 not success and not bad_snode,
2456 constants.CV_EINSTANCEFAULTYDISK, instance,
2457 "couldn't retrieve status for disk/%s on %s: %s",
2458 idx, nname, bdev_status)
2459 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2460 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2461 constants.CV_EINSTANCEFAULTYDISK, instance,
2462 "disk/%s on %s is faulty", idx, nname)
2464 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2465 """Verify if there are any unknown volumes in the cluster.
2467 The .os, .swap and backup volumes are ignored. All other volumes are
2468 reported as unknown.
2470 @type reserved: L{ganeti.utils.FieldSet}
2471 @param reserved: a FieldSet of reserved volume names
2474 for node, n_img in node_image.items():
2475 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2476 self.all_node_info[node].group != self.group_uuid):
2477 # skip non-healthy nodes
2479 for volume in n_img.volumes:
2480 test = ((node not in node_vol_should or
2481 volume not in node_vol_should[node]) and
2482 not reserved.Matches(volume))
2483 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2484 "volume %s is unknown", volume)
2486 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2487 """Verify N+1 Memory Resilience.
2489 Check that if one single node dies we can still start all the
2490 instances it was primary for.
2493 cluster_info = self.cfg.GetClusterInfo()
2494 for node, n_img in node_image.items():
2495 # This code checks that every node which is now listed as
2496 # secondary has enough memory to host all instances it is
2497 # supposed to should a single other node in the cluster fail.
2498 # FIXME: not ready for failover to an arbitrary node
2499 # FIXME: does not support file-backed instances
2500 # WARNING: we currently take into account down instances as well
2501 # as up ones, considering that even if they're down someone
2502 # might want to start them even in the event of a node failure.
2503 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2504 # we're skipping nodes marked offline and nodes in other groups from
2505 # the N+1 warning, since most likely we don't have good memory
2506 # infromation from them; we already list instances living on such
2507 # nodes, and that's enough warning
2509 #TODO(dynmem): also consider ballooning out other instances
2510 for prinode, instances in n_img.sbp.items():
2512 for instance in instances:
2513 bep = cluster_info.FillBE(instance_cfg[instance])
2514 if bep[constants.BE_AUTO_BALANCE]:
2515 needed_mem += bep[constants.BE_MINMEM]
2516 test = n_img.mfree < needed_mem
2517 self._ErrorIf(test, constants.CV_ENODEN1, node,
2518 "not enough memory to accomodate instance failovers"
2519 " should node %s fail (%dMiB needed, %dMiB available)",
2520 prinode, needed_mem, n_img.mfree)
2523 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2524 (files_all, files_opt, files_mc, files_vm)):
2525 """Verifies file checksums collected from all nodes.
2527 @param errorif: Callback for reporting errors
2528 @param nodeinfo: List of L{objects.Node} objects
2529 @param master_node: Name of master node
2530 @param all_nvinfo: RPC results
2533 # Define functions determining which nodes to consider for a file
2536 (files_mc, lambda node: (node.master_candidate or
2537 node.name == master_node)),
2538 (files_vm, lambda node: node.vm_capable),
2541 # Build mapping from filename to list of nodes which should have the file
2543 for (files, fn) in files2nodefn:
2545 filenodes = nodeinfo
2547 filenodes = filter(fn, nodeinfo)
2548 nodefiles.update((filename,
2549 frozenset(map(operator.attrgetter("name"), filenodes)))
2550 for filename in files)
2552 assert set(nodefiles) == (files_all | files_mc | files_vm)
2554 fileinfo = dict((filename, {}) for filename in nodefiles)
2555 ignore_nodes = set()
2557 for node in nodeinfo:
2559 ignore_nodes.add(node.name)
2562 nresult = all_nvinfo[node.name]
2564 if nresult.fail_msg or not nresult.payload:
2567 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2569 test = not (node_files and isinstance(node_files, dict))
2570 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2571 "Node did not return file checksum data")
2573 ignore_nodes.add(node.name)
2576 # Build per-checksum mapping from filename to nodes having it
2577 for (filename, checksum) in node_files.items():
2578 assert filename in nodefiles
2579 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2581 for (filename, checksums) in fileinfo.items():
2582 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2584 # Nodes having the file
2585 with_file = frozenset(node_name
2586 for nodes in fileinfo[filename].values()
2587 for node_name in nodes) - ignore_nodes
2589 expected_nodes = nodefiles[filename] - ignore_nodes
2591 # Nodes missing file
2592 missing_file = expected_nodes - with_file
2594 if filename in files_opt:
2596 errorif(missing_file and missing_file != expected_nodes,
2597 constants.CV_ECLUSTERFILECHECK, None,
2598 "File %s is optional, but it must exist on all or no"
2599 " nodes (not found on %s)",
2600 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2602 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2603 "File %s is missing from node(s) %s", filename,
2604 utils.CommaJoin(utils.NiceSort(missing_file)))
2606 # Warn if a node has a file it shouldn't
2607 unexpected = with_file - expected_nodes
2609 constants.CV_ECLUSTERFILECHECK, None,
2610 "File %s should not exist on node(s) %s",
2611 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2613 # See if there are multiple versions of the file
2614 test = len(checksums) > 1
2616 variants = ["variant %s on %s" %
2617 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2618 for (idx, (checksum, nodes)) in
2619 enumerate(sorted(checksums.items()))]
2623 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2624 "File %s found with %s different checksums (%s)",
2625 filename, len(checksums), "; ".join(variants))
2627 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2629 """Verifies and the node DRBD status.
2631 @type ninfo: L{objects.Node}
2632 @param ninfo: the node to check
2633 @param nresult: the remote results for the node
2634 @param instanceinfo: the dict of instances
2635 @param drbd_helper: the configured DRBD usermode helper
2636 @param drbd_map: the DRBD map as returned by
2637 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2641 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2644 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2645 test = (helper_result == None)
2646 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2647 "no drbd usermode helper returned")
2649 status, payload = helper_result
2651 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2652 "drbd usermode helper check unsuccessful: %s", payload)
2653 test = status and (payload != drbd_helper)
2654 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655 "wrong drbd usermode helper: %s", payload)
2657 # compute the DRBD minors
2659 for minor, instance in drbd_map[node].items():
2660 test = instance not in instanceinfo
2661 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2662 "ghost instance '%s' in temporary DRBD map", instance)
2663 # ghost instance should not be running, but otherwise we
2664 # don't give double warnings (both ghost instance and
2665 # unallocated minor in use)
2667 node_drbd[minor] = (instance, False)
2669 instance = instanceinfo[instance]
2670 node_drbd[minor] = (instance.name,
2671 instance.admin_state == constants.ADMINST_UP)
2673 # and now check them
2674 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2675 test = not isinstance(used_minors, (tuple, list))
2676 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2677 "cannot parse drbd status file: %s", str(used_minors))
2679 # we cannot check drbd status
2682 for minor, (iname, must_exist) in node_drbd.items():
2683 test = minor not in used_minors and must_exist
2684 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2685 "drbd minor %d of instance %s is not active", minor, iname)
2686 for minor in used_minors:
2687 test = minor not in node_drbd
2688 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2689 "unallocated drbd minor %d is in use", minor)
2691 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2692 """Builds the node OS structures.
2694 @type ninfo: L{objects.Node}
2695 @param ninfo: the node to check
2696 @param nresult: the remote results for the node
2697 @param nimg: the node image object
2701 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2703 remote_os = nresult.get(constants.NV_OSLIST, None)
2704 test = (not isinstance(remote_os, list) or
2705 not compat.all(isinstance(v, list) and len(v) == 7
2706 for v in remote_os))
2708 _ErrorIf(test, constants.CV_ENODEOS, node,
2709 "node hasn't returned valid OS data")
2718 for (name, os_path, status, diagnose,
2719 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2721 if name not in os_dict:
2724 # parameters is a list of lists instead of list of tuples due to
2725 # JSON lacking a real tuple type, fix it:
2726 parameters = [tuple(v) for v in parameters]
2727 os_dict[name].append((os_path, status, diagnose,
2728 set(variants), set(parameters), set(api_ver)))
2730 nimg.oslist = os_dict
2732 def _VerifyNodeOS(self, ninfo, nimg, base):
2733 """Verifies the node OS list.
2735 @type ninfo: L{objects.Node}
2736 @param ninfo: the node to check
2737 @param nimg: the node image object
2738 @param base: the 'template' node we match against (e.g. from the master)
2742 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2744 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2746 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2747 for os_name, os_data in nimg.oslist.items():
2748 assert os_data, "Empty OS status for OS %s?!" % os_name
2749 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2750 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2751 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2752 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2753 "OS '%s' has multiple entries (first one shadows the rest): %s",
2754 os_name, utils.CommaJoin([v[0] for v in os_data]))
2755 # comparisons with the 'base' image
2756 test = os_name not in base.oslist
2757 _ErrorIf(test, constants.CV_ENODEOS, node,
2758 "Extra OS %s not present on reference node (%s)",
2762 assert base.oslist[os_name], "Base node has empty OS status?"
2763 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2765 # base OS is invalid, skipping
2767 for kind, a, b in [("API version", f_api, b_api),
2768 ("variants list", f_var, b_var),
2769 ("parameters", beautify_params(f_param),
2770 beautify_params(b_param))]:
2771 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2772 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2773 kind, os_name, base.name,
2774 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2776 # check any missing OSes
2777 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2778 _ErrorIf(missing, constants.CV_ENODEOS, node,
2779 "OSes present on reference node %s but missing on this node: %s",
2780 base.name, utils.CommaJoin(missing))
2782 def _VerifyOob(self, ninfo, nresult):
2783 """Verifies out of band functionality of a node.
2785 @type ninfo: L{objects.Node}
2786 @param ninfo: the node to check
2787 @param nresult: the remote results for the node
2791 # We just have to verify the paths on master and/or master candidates
2792 # as the oob helper is invoked on the master
2793 if ((ninfo.master_candidate or ninfo.master_capable) and
2794 constants.NV_OOB_PATHS in nresult):
2795 for path_result in nresult[constants.NV_OOB_PATHS]:
2796 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2798 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2799 """Verifies and updates the node volume data.
2801 This function will update a L{NodeImage}'s internal structures
2802 with data from the remote call.
2804 @type ninfo: L{objects.Node}
2805 @param ninfo: the node to check
2806 @param nresult: the remote results for the node
2807 @param nimg: the node image object
2808 @param vg_name: the configured VG name
2812 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2814 nimg.lvm_fail = True
2815 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2818 elif isinstance(lvdata, basestring):
2819 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2820 utils.SafeEncode(lvdata))
2821 elif not isinstance(lvdata, dict):
2822 _ErrorIf(True, constants.CV_ENODELVM, node,
2823 "rpc call to node failed (lvlist)")
2825 nimg.volumes = lvdata
2826 nimg.lvm_fail = False
2828 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2829 """Verifies and updates the node instance list.
2831 If the listing was successful, then updates this node's instance
2832 list. Otherwise, it marks the RPC call as failed for the instance
2835 @type ninfo: L{objects.Node}
2836 @param ninfo: the node to check
2837 @param nresult: the remote results for the node
2838 @param nimg: the node image object
2841 idata = nresult.get(constants.NV_INSTANCELIST, None)
2842 test = not isinstance(idata, list)
2843 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2844 "rpc call to node failed (instancelist): %s",
2845 utils.SafeEncode(str(idata)))
2847 nimg.hyp_fail = True
2849 nimg.instances = idata
2851 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2852 """Verifies and computes a node information map
2854 @type ninfo: L{objects.Node}
2855 @param ninfo: the node to check
2856 @param nresult: the remote results for the node
2857 @param nimg: the node image object
2858 @param vg_name: the configured VG name
2862 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2864 # try to read free memory (from the hypervisor)
2865 hv_info = nresult.get(constants.NV_HVINFO, None)
2866 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2867 _ErrorIf(test, constants.CV_ENODEHV, node,
2868 "rpc call to node failed (hvinfo)")
2871 nimg.mfree = int(hv_info["memory_free"])
2872 except (ValueError, TypeError):
2873 _ErrorIf(True, constants.CV_ENODERPC, node,
2874 "node returned invalid nodeinfo, check hypervisor")
2876 # FIXME: devise a free space model for file based instances as well
2877 if vg_name is not None:
2878 test = (constants.NV_VGLIST not in nresult or
2879 vg_name not in nresult[constants.NV_VGLIST])
2880 _ErrorIf(test, constants.CV_ENODELVM, node,
2881 "node didn't return data for the volume group '%s'"
2882 " - it is either missing or broken", vg_name)
2885 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2886 except (ValueError, TypeError):
2887 _ErrorIf(True, constants.CV_ENODERPC, node,
2888 "node returned invalid LVM info, check LVM status")
2890 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2891 """Gets per-disk status information for all instances.
2893 @type nodelist: list of strings
2894 @param nodelist: Node names
2895 @type node_image: dict of (name, L{objects.Node})
2896 @param node_image: Node objects
2897 @type instanceinfo: dict of (name, L{objects.Instance})
2898 @param instanceinfo: Instance objects
2899 @rtype: {instance: {node: [(succes, payload)]}}
2900 @return: a dictionary of per-instance dictionaries with nodes as
2901 keys and disk information as values; the disk information is a
2902 list of tuples (success, payload)
2905 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2908 node_disks_devonly = {}
2909 diskless_instances = set()
2910 diskless = constants.DT_DISKLESS
2912 for nname in nodelist:
2913 node_instances = list(itertools.chain(node_image[nname].pinst,
2914 node_image[nname].sinst))
2915 diskless_instances.update(inst for inst in node_instances
2916 if instanceinfo[inst].disk_template == diskless)
2917 disks = [(inst, disk)
2918 for inst in node_instances
2919 for disk in instanceinfo[inst].disks]
2922 # No need to collect data
2925 node_disks[nname] = disks
2927 # _AnnotateDiskParams makes already copies of the disks
2929 for (inst, dev) in disks:
2930 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2931 self.cfg.SetDiskID(anno_disk, nname)
2932 devonly.append(anno_disk)
2934 node_disks_devonly[nname] = devonly
2936 assert len(node_disks) == len(node_disks_devonly)
2938 # Collect data from all nodes with disks
2939 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2942 assert len(result) == len(node_disks)
2946 for (nname, nres) in result.items():
2947 disks = node_disks[nname]
2950 # No data from this node
2951 data = len(disks) * [(False, "node offline")]
2954 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2955 "while getting disk information: %s", msg)
2957 # No data from this node
2958 data = len(disks) * [(False, msg)]
2961 for idx, i in enumerate(nres.payload):
2962 if isinstance(i, (tuple, list)) and len(i) == 2:
2965 logging.warning("Invalid result from node %s, entry %d: %s",
2967 data.append((False, "Invalid result from the remote node"))
2969 for ((inst, _), status) in zip(disks, data):
2970 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2972 # Add empty entries for diskless instances.
2973 for inst in diskless_instances:
2974 assert inst not in instdisk
2977 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2978 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2979 compat.all(isinstance(s, (tuple, list)) and
2980 len(s) == 2 for s in statuses)
2981 for inst, nnames in instdisk.items()
2982 for nname, statuses in nnames.items())
2983 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2988 def _SshNodeSelector(group_uuid, all_nodes):
2989 """Create endless iterators for all potential SSH check hosts.
2992 nodes = [node for node in all_nodes
2993 if (node.group != group_uuid and
2995 keyfunc = operator.attrgetter("group")
2997 return map(itertools.cycle,
2998 [sorted(map(operator.attrgetter("name"), names))
2999 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3003 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3004 """Choose which nodes should talk to which other nodes.
3006 We will make nodes contact all nodes in their group, and one node from
3009 @warning: This algorithm has a known issue if one node group is much
3010 smaller than others (e.g. just one node). In such a case all other
3011 nodes will talk to the single node.
3014 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3015 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3017 return (online_nodes,
3018 dict((name, sorted([i.next() for i in sel]))
3019 for name in online_nodes))
3021 def BuildHooksEnv(self):
3024 Cluster-Verify hooks just ran in the post phase and their failure makes
3025 the output be logged in the verify output and the verification to fail.
3029 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3032 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3033 for node in self.my_node_info.values())
3037 def BuildHooksNodes(self):
3038 """Build hooks nodes.
3041 return ([], self.my_node_names)
3043 def Exec(self, feedback_fn):
3044 """Verify integrity of the node group, performing various test on nodes.
3047 # This method has too many local variables. pylint: disable=R0914
3048 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3050 if not self.my_node_names:
3052 feedback_fn("* Empty node group, skipping verification")
3056 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3057 verbose = self.op.verbose
3058 self._feedback_fn = feedback_fn
3060 vg_name = self.cfg.GetVGName()
3061 drbd_helper = self.cfg.GetDRBDHelper()
3062 cluster = self.cfg.GetClusterInfo()
3063 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3064 hypervisors = cluster.enabled_hypervisors
3065 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3067 i_non_redundant = [] # Non redundant instances
3068 i_non_a_balanced = [] # Non auto-balanced instances
3069 i_offline = 0 # Count of offline instances
3070 n_offline = 0 # Count of offline nodes
3071 n_drained = 0 # Count of nodes being drained
3072 node_vol_should = {}
3074 # FIXME: verify OS list
3077 filemap = _ComputeAncillaryFiles(cluster, False)
3079 # do local checksums
3080 master_node = self.master_node = self.cfg.GetMasterNode()
3081 master_ip = self.cfg.GetMasterIP()
3083 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3086 if self.cfg.GetUseExternalMipScript():
3087 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3089 node_verify_param = {
3090 constants.NV_FILELIST:
3091 utils.UniqueSequence(filename
3092 for files in filemap
3093 for filename in files),
3094 constants.NV_NODELIST:
3095 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3096 self.all_node_info.values()),
3097 constants.NV_HYPERVISOR: hypervisors,
3098 constants.NV_HVPARAMS:
3099 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3100 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3101 for node in node_data_list
3102 if not node.offline],
3103 constants.NV_INSTANCELIST: hypervisors,
3104 constants.NV_VERSION: None,
3105 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3106 constants.NV_NODESETUP: None,
3107 constants.NV_TIME: None,
3108 constants.NV_MASTERIP: (master_node, master_ip),
3109 constants.NV_OSLIST: None,
3110 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3111 constants.NV_USERSCRIPTS: user_scripts,
3114 if vg_name is not None:
3115 node_verify_param[constants.NV_VGLIST] = None
3116 node_verify_param[constants.NV_LVLIST] = vg_name
3117 node_verify_param[constants.NV_PVLIST] = [vg_name]
3118 node_verify_param[constants.NV_DRBDLIST] = None
3121 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3124 # FIXME: this needs to be changed per node-group, not cluster-wide
3126 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3127 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3128 bridges.add(default_nicpp[constants.NIC_LINK])
3129 for instance in self.my_inst_info.values():
3130 for nic in instance.nics:
3131 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3132 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3133 bridges.add(full_nic[constants.NIC_LINK])
3136 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3138 # Build our expected cluster state
3139 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3141 vm_capable=node.vm_capable))
3142 for node in node_data_list)
3146 for node in self.all_node_info.values():
3147 path = _SupportsOob(self.cfg, node)
3148 if path and path not in oob_paths:
3149 oob_paths.append(path)
3152 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3154 for instance in self.my_inst_names:
3155 inst_config = self.my_inst_info[instance]
3156 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3159 for nname in inst_config.all_nodes:
3160 if nname not in node_image:
3161 gnode = self.NodeImage(name=nname)
3162 gnode.ghost = (nname not in self.all_node_info)
3163 node_image[nname] = gnode
3165 inst_config.MapLVsByNode(node_vol_should)
3167 pnode = inst_config.primary_node
3168 node_image[pnode].pinst.append(instance)
3170 for snode in inst_config.secondary_nodes:
3171 nimg = node_image[snode]
3172 nimg.sinst.append(instance)
3173 if pnode not in nimg.sbp:
3174 nimg.sbp[pnode] = []
3175 nimg.sbp[pnode].append(instance)
3177 # At this point, we have the in-memory data structures complete,
3178 # except for the runtime information, which we'll gather next
3180 # Due to the way our RPC system works, exact response times cannot be
3181 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3182 # time before and after executing the request, we can at least have a time
3184 nvinfo_starttime = time.time()
3185 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3187 self.cfg.GetClusterName())
3188 nvinfo_endtime = time.time()
3190 if self.extra_lv_nodes and vg_name is not None:
3192 self.rpc.call_node_verify(self.extra_lv_nodes,
3193 {constants.NV_LVLIST: vg_name},
3194 self.cfg.GetClusterName())
3196 extra_lv_nvinfo = {}
3198 all_drbd_map = self.cfg.ComputeDRBDMap()
3200 feedback_fn("* Gathering disk information (%s nodes)" %
3201 len(self.my_node_names))
3202 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3205 feedback_fn("* Verifying configuration file consistency")
3207 # If not all nodes are being checked, we need to make sure the master node
3208 # and a non-checked vm_capable node are in the list.
3209 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3211 vf_nvinfo = all_nvinfo.copy()
3212 vf_node_info = list(self.my_node_info.values())
3213 additional_nodes = []
3214 if master_node not in self.my_node_info:
3215 additional_nodes.append(master_node)
3216 vf_node_info.append(self.all_node_info[master_node])
3217 # Add the first vm_capable node we find which is not included
3218 for node in absent_nodes:
3219 nodeinfo = self.all_node_info[node]
3220 if nodeinfo.vm_capable and not nodeinfo.offline:
3221 additional_nodes.append(node)
3222 vf_node_info.append(self.all_node_info[node])
3224 key = constants.NV_FILELIST
3225 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3226 {key: node_verify_param[key]},
3227 self.cfg.GetClusterName()))
3229 vf_nvinfo = all_nvinfo
3230 vf_node_info = self.my_node_info.values()
3232 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3234 feedback_fn("* Verifying node status")
3238 for node_i in node_data_list:
3240 nimg = node_image[node]
3244 feedback_fn("* Skipping offline node %s" % (node,))
3248 if node == master_node:
3250 elif node_i.master_candidate:
3251 ntype = "master candidate"
3252 elif node_i.drained:
3258 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3260 msg = all_nvinfo[node].fail_msg
3261 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3264 nimg.rpc_fail = True
3267 nresult = all_nvinfo[node].payload
3269 nimg.call_ok = self._VerifyNode(node_i, nresult)
3270 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3271 self._VerifyNodeNetwork(node_i, nresult)
3272 self._VerifyNodeUserScripts(node_i, nresult)
3273 self._VerifyOob(node_i, nresult)
3276 self._VerifyNodeLVM(node_i, nresult, vg_name)
3277 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3280 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3281 self._UpdateNodeInstances(node_i, nresult, nimg)
3282 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3283 self._UpdateNodeOS(node_i, nresult, nimg)
3285 if not nimg.os_fail:
3286 if refos_img is None:
3288 self._VerifyNodeOS(node_i, nimg, refos_img)
3289 self._VerifyNodeBridges(node_i, nresult, bridges)
3291 # Check whether all running instancies are primary for the node. (This
3292 # can no longer be done from _VerifyInstance below, since some of the
3293 # wrong instances could be from other node groups.)
3294 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3296 for inst in non_primary_inst:
3297 test = inst in self.all_inst_info
3298 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3299 "instance should not run on node %s", node_i.name)
3300 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3301 "node is running unknown instance %s", inst)
3303 for node, result in extra_lv_nvinfo.items():
3304 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3305 node_image[node], vg_name)
3307 feedback_fn("* Verifying instance status")
3308 for instance in self.my_inst_names:
3310 feedback_fn("* Verifying instance %s" % instance)
3311 inst_config = self.my_inst_info[instance]
3312 self._VerifyInstance(instance, inst_config, node_image,
3314 inst_nodes_offline = []
3316 pnode = inst_config.primary_node
3317 pnode_img = node_image[pnode]
3318 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3319 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3320 " primary node failed", instance)
3322 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3324 constants.CV_EINSTANCEBADNODE, instance,
3325 "instance is marked as running and lives on offline node %s",
3326 inst_config.primary_node)
3328 # If the instance is non-redundant we cannot survive losing its primary
3329 # node, so we are not N+1 compliant. On the other hand we have no disk
3330 # templates with more than one secondary so that situation is not well
3332 # FIXME: does not support file-backed instances
3333 if not inst_config.secondary_nodes:
3334 i_non_redundant.append(instance)
3336 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3337 constants.CV_EINSTANCELAYOUT,
3338 instance, "instance has multiple secondary nodes: %s",
3339 utils.CommaJoin(inst_config.secondary_nodes),
3340 code=self.ETYPE_WARNING)
3342 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3343 pnode = inst_config.primary_node
3344 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3345 instance_groups = {}
3347 for node in instance_nodes:
3348 instance_groups.setdefault(self.all_node_info[node].group,
3352 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3353 # Sort so that we always list the primary node first.
3354 for group, nodes in sorted(instance_groups.items(),
3355 key=lambda (_, nodes): pnode in nodes,
3358 self._ErrorIf(len(instance_groups) > 1,
3359 constants.CV_EINSTANCESPLITGROUPS,
3360 instance, "instance has primary and secondary nodes in"
3361 " different groups: %s", utils.CommaJoin(pretty_list),
3362 code=self.ETYPE_WARNING)
3364 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3365 i_non_a_balanced.append(instance)
3367 for snode in inst_config.secondary_nodes:
3368 s_img = node_image[snode]
3369 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3370 snode, "instance %s, connection to secondary node failed",
3374 inst_nodes_offline.append(snode)
3376 # warn that the instance lives on offline nodes
3377 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3378 "instance has offline secondary node(s) %s",
3379 utils.CommaJoin(inst_nodes_offline))
3380 # ... or ghost/non-vm_capable nodes
3381 for node in inst_config.all_nodes:
3382 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3383 instance, "instance lives on ghost node %s", node)
3384 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3385 instance, "instance lives on non-vm_capable node %s", node)
3387 feedback_fn("* Verifying orphan volumes")
3388 reserved = utils.FieldSet(*cluster.reserved_lvs)
3390 # We will get spurious "unknown volume" warnings if any node of this group
3391 # is secondary for an instance whose primary is in another group. To avoid
3392 # them, we find these instances and add their volumes to node_vol_should.
3393 for inst in self.all_inst_info.values():
3394 for secondary in inst.secondary_nodes:
3395 if (secondary in self.my_node_info
3396 and inst.name not in self.my_inst_info):
3397 inst.MapLVsByNode(node_vol_should)
3400 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3402 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3403 feedback_fn("* Verifying N+1 Memory redundancy")
3404 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3406 feedback_fn("* Other Notes")
3408 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3409 % len(i_non_redundant))
3411 if i_non_a_balanced:
3412 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3413 % len(i_non_a_balanced))
3416 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3419 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3422 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3426 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3427 """Analyze the post-hooks' result
3429 This method analyses the hook result, handles it, and sends some
3430 nicely-formatted feedback back to the user.
3432 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3433 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3434 @param hooks_results: the results of the multi-node hooks rpc call
3435 @param feedback_fn: function used send feedback back to the caller
3436 @param lu_result: previous Exec result
3437 @return: the new Exec result, based on the previous result
3441 # We only really run POST phase hooks, only for non-empty groups,
3442 # and are only interested in their results
3443 if not self.my_node_names:
3446 elif phase == constants.HOOKS_PHASE_POST:
3447 # Used to change hooks' output to proper indentation
3448 feedback_fn("* Hooks Results")
3449 assert hooks_results, "invalid result from hooks"
3451 for node_name in hooks_results:
3452 res = hooks_results[node_name]
3454 test = msg and not res.offline
3455 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3456 "Communication failure in hooks execution: %s", msg)
3457 if res.offline or msg:
3458 # No need to investigate payload if node is offline or gave
3461 for script, hkr, output in res.payload:
3462 test = hkr == constants.HKR_FAIL
3463 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3464 "Script %s failed, output:", script)
3466 output = self._HOOKS_INDENT_RE.sub(" ", output)
3467 feedback_fn("%s" % output)
3473 class LUClusterVerifyDisks(NoHooksLU):
3474 """Verifies the cluster disks status.
3479 def ExpandNames(self):
3480 self.share_locks = _ShareAll()
3481 self.needed_locks = {
3482 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3485 def Exec(self, feedback_fn):
3486 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3488 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3489 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3490 for group in group_names])
3493 class LUGroupVerifyDisks(NoHooksLU):
3494 """Verifies the status of all disks in a node group.
3499 def ExpandNames(self):
3500 # Raises errors.OpPrereqError on its own if group can't be found
3501 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3503 self.share_locks = _ShareAll()
3504 self.needed_locks = {
3505 locking.LEVEL_INSTANCE: [],
3506 locking.LEVEL_NODEGROUP: [],
3507 locking.LEVEL_NODE: [],
3510 def DeclareLocks(self, level):
3511 if level == locking.LEVEL_INSTANCE:
3512 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3514 # Lock instances optimistically, needs verification once node and group
3515 # locks have been acquired
3516 self.needed_locks[locking.LEVEL_INSTANCE] = \
3517 self.cfg.GetNodeGroupInstances(self.group_uuid)
3519 elif level == locking.LEVEL_NODEGROUP:
3520 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3522 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3523 set([self.group_uuid] +
3524 # Lock all groups used by instances optimistically; this requires
3525 # going via the node before it's locked, requiring verification
3528 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3529 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3531 elif level == locking.LEVEL_NODE:
3532 # This will only lock the nodes in the group to be verified which contain
3534 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3535 self._LockInstancesNodes()
3537 # Lock all nodes in group to be verified
3538 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3539 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3540 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3542 def CheckPrereq(self):
3543 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3544 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3545 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3547 assert self.group_uuid in owned_groups
3549 # Check if locked instances are still correct
3550 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3552 # Get instance information
3553 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3555 # Check if node groups for locked instances are still correct
3556 _CheckInstancesNodeGroups(self.cfg, self.instances,
3557 owned_groups, owned_nodes, self.group_uuid)
3559 def Exec(self, feedback_fn):
3560 """Verify integrity of cluster disks.
3562 @rtype: tuple of three items
3563 @return: a tuple of (dict of node-to-node_error, list of instances
3564 which need activate-disks, dict of instance: (node, volume) for
3569 res_instances = set()
3572 nv_dict = _MapInstanceDisksToNodes([inst
3573 for inst in self.instances.values()
3574 if inst.admin_state == constants.ADMINST_UP])
3577 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3578 set(self.cfg.GetVmCapableNodeList()))
3580 node_lvs = self.rpc.call_lv_list(nodes, [])
3582 for (node, node_res) in node_lvs.items():
3583 if node_res.offline:
3586 msg = node_res.fail_msg
3588 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3589 res_nodes[node] = msg
3592 for lv_name, (_, _, lv_online) in node_res.payload.items():
3593 inst = nv_dict.pop((node, lv_name), None)
3594 if not (lv_online or inst is None):
3595 res_instances.add(inst)
3597 # any leftover items in nv_dict are missing LVs, let's arrange the data
3599 for key, inst in nv_dict.iteritems():
3600 res_missing.setdefault(inst, []).append(list(key))
3602 return (res_nodes, list(res_instances), res_missing)
3605 class LUClusterRepairDiskSizes(NoHooksLU):
3606 """Verifies the cluster disks sizes.
3611 def ExpandNames(self):
3612 if self.op.instances:
3613 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3614 self.needed_locks = {
3615 locking.LEVEL_NODE_RES: [],
3616 locking.LEVEL_INSTANCE: self.wanted_names,
3618 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3620 self.wanted_names = None
3621 self.needed_locks = {
3622 locking.LEVEL_NODE_RES: locking.ALL_SET,
3623 locking.LEVEL_INSTANCE: locking.ALL_SET,
3625 self.share_locks = {
3626 locking.LEVEL_NODE_RES: 1,
3627 locking.LEVEL_INSTANCE: 0,
3630 def DeclareLocks(self, level):
3631 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3632 self._LockInstancesNodes(primary_only=True, level=level)
3634 def CheckPrereq(self):
3635 """Check prerequisites.
3637 This only checks the optional instance list against the existing names.
3640 if self.wanted_names is None:
3641 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3643 self.wanted_instances = \
3644 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3646 def _EnsureChildSizes(self, disk):
3647 """Ensure children of the disk have the needed disk size.
3649 This is valid mainly for DRBD8 and fixes an issue where the
3650 children have smaller disk size.
3652 @param disk: an L{ganeti.objects.Disk} object
3655 if disk.dev_type == constants.LD_DRBD8:
3656 assert disk.children, "Empty children for DRBD8?"
3657 fchild = disk.children[0]
3658 mismatch = fchild.size < disk.size
3660 self.LogInfo("Child disk has size %d, parent %d, fixing",
3661 fchild.size, disk.size)
3662 fchild.size = disk.size
3664 # and we recurse on this child only, not on the metadev
3665 return self._EnsureChildSizes(fchild) or mismatch
3669 def Exec(self, feedback_fn):
3670 """Verify the size of cluster disks.
3673 # TODO: check child disks too
3674 # TODO: check differences in size between primary/secondary nodes
3676 for instance in self.wanted_instances:
3677 pnode = instance.primary_node
3678 if pnode not in per_node_disks:
3679 per_node_disks[pnode] = []
3680 for idx, disk in enumerate(instance.disks):
3681 per_node_disks[pnode].append((instance, idx, disk))
3683 assert not (frozenset(per_node_disks.keys()) -
3684 self.owned_locks(locking.LEVEL_NODE_RES)), \
3685 "Not owning correct locks"
3686 assert not self.owned_locks(locking.LEVEL_NODE)
3689 for node, dskl in per_node_disks.items():
3690 newl = [v[2].Copy() for v in dskl]
3692 self.cfg.SetDiskID(dsk, node)
3693 result = self.rpc.call_blockdev_getsize(node, newl)
3695 self.LogWarning("Failure in blockdev_getsize call to node"
3696 " %s, ignoring", node)
3698 if len(result.payload) != len(dskl):
3699 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3700 " result.payload=%s", node, len(dskl), result.payload)
3701 self.LogWarning("Invalid result from node %s, ignoring node results",
3704 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3706 self.LogWarning("Disk %d of instance %s did not return size"
3707 " information, ignoring", idx, instance.name)
3709 if not isinstance(size, (int, long)):
3710 self.LogWarning("Disk %d of instance %s did not return valid"
3711 " size information, ignoring", idx, instance.name)
3714 if size != disk.size:
3715 self.LogInfo("Disk %d of instance %s has mismatched size,"
3716 " correcting: recorded %d, actual %d", idx,
3717 instance.name, disk.size, size)
3719 self.cfg.Update(instance, feedback_fn)
3720 changed.append((instance.name, idx, size))
3721 if self._EnsureChildSizes(disk):
3722 self.cfg.Update(instance, feedback_fn)
3723 changed.append((instance.name, idx, disk.size))
3727 class LUClusterRename(LogicalUnit):
3728 """Rename the cluster.
3731 HPATH = "cluster-rename"
3732 HTYPE = constants.HTYPE_CLUSTER
3734 def BuildHooksEnv(self):
3739 "OP_TARGET": self.cfg.GetClusterName(),
3740 "NEW_NAME": self.op.name,
3743 def BuildHooksNodes(self):
3744 """Build hooks nodes.
3747 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3749 def CheckPrereq(self):
3750 """Verify that the passed name is a valid one.
3753 hostname = netutils.GetHostname(name=self.op.name,
3754 family=self.cfg.GetPrimaryIPFamily())
3756 new_name = hostname.name
3757 self.ip = new_ip = hostname.ip
3758 old_name = self.cfg.GetClusterName()
3759 old_ip = self.cfg.GetMasterIP()
3760 if new_name == old_name and new_ip == old_ip:
3761 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3762 " cluster has changed",
3764 if new_ip != old_ip:
3765 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3766 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3767 " reachable on the network" %
3768 new_ip, errors.ECODE_NOTUNIQUE)
3770 self.op.name = new_name
3772 def Exec(self, feedback_fn):
3773 """Rename the cluster.
3776 clustername = self.op.name
3779 # shutdown the master IP
3780 master_params = self.cfg.GetMasterNetworkParameters()
3781 ems = self.cfg.GetUseExternalMipScript()
3782 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3784 result.Raise("Could not disable the master role")
3787 cluster = self.cfg.GetClusterInfo()
3788 cluster.cluster_name = clustername
3789 cluster.master_ip = new_ip
3790 self.cfg.Update(cluster, feedback_fn)
3792 # update the known hosts file
3793 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3794 node_list = self.cfg.GetOnlineNodeList()
3796 node_list.remove(master_params.name)
3799 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3801 master_params.ip = new_ip
3802 result = self.rpc.call_node_activate_master_ip(master_params.name,
3804 msg = result.fail_msg
3806 self.LogWarning("Could not re-enable the master role on"
3807 " the master, please restart manually: %s", msg)
3812 def _ValidateNetmask(cfg, netmask):
3813 """Checks if a netmask is valid.
3815 @type cfg: L{config.ConfigWriter}
3816 @param cfg: The cluster configuration
3818 @param netmask: the netmask to be verified
3819 @raise errors.OpPrereqError: if the validation fails
3822 ip_family = cfg.GetPrimaryIPFamily()
3824 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3825 except errors.ProgrammerError:
3826 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3828 if not ipcls.ValidateNetmask(netmask):
3829 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3833 class LUClusterSetParams(LogicalUnit):
3834 """Change the parameters of the cluster.
3837 HPATH = "cluster-modify"
3838 HTYPE = constants.HTYPE_CLUSTER
3841 def CheckArguments(self):
3845 if self.op.uid_pool:
3846 uidpool.CheckUidPool(self.op.uid_pool)
3848 if self.op.add_uids:
3849 uidpool.CheckUidPool(self.op.add_uids)
3851 if self.op.remove_uids:
3852 uidpool.CheckUidPool(self.op.remove_uids)
3854 if self.op.master_netmask is not None:
3855 _ValidateNetmask(self.cfg, self.op.master_netmask)
3857 if self.op.diskparams:
3858 for dt_params in self.op.diskparams.values():
3859 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3861 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3862 except errors.OpPrereqError, err:
3863 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3866 def ExpandNames(self):
3867 # FIXME: in the future maybe other cluster params won't require checking on
3868 # all nodes to be modified.
3869 self.needed_locks = {
3870 locking.LEVEL_NODE: locking.ALL_SET,
3871 locking.LEVEL_INSTANCE: locking.ALL_SET,
3872 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3874 self.share_locks = {
3875 locking.LEVEL_NODE: 1,
3876 locking.LEVEL_INSTANCE: 1,
3877 locking.LEVEL_NODEGROUP: 1,
3880 def BuildHooksEnv(self):
3885 "OP_TARGET": self.cfg.GetClusterName(),
3886 "NEW_VG_NAME": self.op.vg_name,
3889 def BuildHooksNodes(self):
3890 """Build hooks nodes.
3893 mn = self.cfg.GetMasterNode()
3896 def CheckPrereq(self):
3897 """Check prerequisites.
3899 This checks whether the given params don't conflict and
3900 if the given volume group is valid.
3903 if self.op.vg_name is not None and not self.op.vg_name:
3904 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3905 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3906 " instances exist", errors.ECODE_INVAL)
3908 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3909 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3910 raise errors.OpPrereqError("Cannot disable drbd helper while"
3911 " drbd-based instances exist",
3914 node_list = self.owned_locks(locking.LEVEL_NODE)
3916 # if vg_name not None, checks given volume group on all nodes
3918 vglist = self.rpc.call_vg_list(node_list)
3919 for node in node_list:
3920 msg = vglist[node].fail_msg
3922 # ignoring down node
3923 self.LogWarning("Error while gathering data on node %s"
3924 " (ignoring node): %s", node, msg)
3926 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3928 constants.MIN_VG_SIZE)
3930 raise errors.OpPrereqError("Error on node '%s': %s" %
3931 (node, vgstatus), errors.ECODE_ENVIRON)
3933 if self.op.drbd_helper:
3934 # checks given drbd helper on all nodes
3935 helpers = self.rpc.call_drbd_helper(node_list)
3936 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3938 self.LogInfo("Not checking drbd helper on offline node %s", node)
3940 msg = helpers[node].fail_msg
3942 raise errors.OpPrereqError("Error checking drbd helper on node"
3943 " '%s': %s" % (node, msg),
3944 errors.ECODE_ENVIRON)
3945 node_helper = helpers[node].payload
3946 if node_helper != self.op.drbd_helper:
3947 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3948 (node, node_helper), errors.ECODE_ENVIRON)
3950 self.cluster = cluster = self.cfg.GetClusterInfo()
3951 # validate params changes
3952 if self.op.beparams:
3953 objects.UpgradeBeParams(self.op.beparams)
3954 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3955 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3957 if self.op.ndparams:
3958 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3959 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3961 # TODO: we need a more general way to handle resetting
3962 # cluster-level parameters to default values
3963 if self.new_ndparams["oob_program"] == "":
3964 self.new_ndparams["oob_program"] = \
3965 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3967 if self.op.hv_state:
3968 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3969 self.cluster.hv_state_static)
3970 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3971 for hv, values in new_hv_state.items())
3973 if self.op.disk_state:
3974 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3975 self.cluster.disk_state_static)
3976 self.new_disk_state = \
3977 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3978 for name, values in svalues.items()))
3979 for storage, svalues in new_disk_state.items())
3982 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3985 all_instances = self.cfg.GetAllInstancesInfo().values()
3987 for group in self.cfg.GetAllNodeGroupsInfo().values():
3988 instances = frozenset([inst for inst in all_instances
3989 if compat.any(node in group.members
3990 for node in inst.all_nodes)])
3991 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3992 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3994 new_ipolicy, instances)
3996 violations.update(new)
3999 self.LogWarning("After the ipolicy change the following instances"
4000 " violate them: %s",
4001 utils.CommaJoin(utils.NiceSort(violations)))
4003 if self.op.nicparams:
4004 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4005 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4006 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4009 # check all instances for consistency
4010 for instance in self.cfg.GetAllInstancesInfo().values():
4011 for nic_idx, nic in enumerate(instance.nics):
4012 params_copy = copy.deepcopy(nic.nicparams)
4013 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4015 # check parameter syntax
4017 objects.NIC.CheckParameterSyntax(params_filled)
4018 except errors.ConfigurationError, err:
4019 nic_errors.append("Instance %s, nic/%d: %s" %
4020 (instance.name, nic_idx, err))
4022 # if we're moving instances to routed, check that they have an ip
4023 target_mode = params_filled[constants.NIC_MODE]
4024 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4025 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4026 " address" % (instance.name, nic_idx))
4028 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4029 "\n".join(nic_errors))
4031 # hypervisor list/parameters
4032 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4033 if self.op.hvparams:
4034 for hv_name, hv_dict in self.op.hvparams.items():
4035 if hv_name not in self.new_hvparams:
4036 self.new_hvparams[hv_name] = hv_dict
4038 self.new_hvparams[hv_name].update(hv_dict)
4040 # disk template parameters
4041 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4042 if self.op.diskparams:
4043 for dt_name, dt_params in self.op.diskparams.items():
4044 if dt_name not in self.op.diskparams:
4045 self.new_diskparams[dt_name] = dt_params
4047 self.new_diskparams[dt_name].update(dt_params)
4049 # os hypervisor parameters
4050 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4052 for os_name, hvs in self.op.os_hvp.items():
4053 if os_name not in self.new_os_hvp:
4054 self.new_os_hvp[os_name] = hvs
4056 for hv_name, hv_dict in hvs.items():
4057 if hv_name not in self.new_os_hvp[os_name]:
4058 self.new_os_hvp[os_name][hv_name] = hv_dict
4060 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4063 self.new_osp = objects.FillDict(cluster.osparams, {})
4064 if self.op.osparams:
4065 for os_name, osp in self.op.osparams.items():
4066 if os_name not in self.new_osp:
4067 self.new_osp[os_name] = {}
4069 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4072 if not self.new_osp[os_name]:
4073 # we removed all parameters
4074 del self.new_osp[os_name]
4076 # check the parameter validity (remote check)
4077 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4078 os_name, self.new_osp[os_name])
4080 # changes to the hypervisor list
4081 if self.op.enabled_hypervisors is not None:
4082 self.hv_list = self.op.enabled_hypervisors
4083 for hv in self.hv_list:
4084 # if the hypervisor doesn't already exist in the cluster
4085 # hvparams, we initialize it to empty, and then (in both
4086 # cases) we make sure to fill the defaults, as we might not
4087 # have a complete defaults list if the hypervisor wasn't
4089 if hv not in new_hvp:
4091 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4092 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4094 self.hv_list = cluster.enabled_hypervisors
4096 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4097 # either the enabled list has changed, or the parameters have, validate
4098 for hv_name, hv_params in self.new_hvparams.items():
4099 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4100 (self.op.enabled_hypervisors and
4101 hv_name in self.op.enabled_hypervisors)):
4102 # either this is a new hypervisor, or its parameters have changed
4103 hv_class = hypervisor.GetHypervisor(hv_name)
4104 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4105 hv_class.CheckParameterSyntax(hv_params)
4106 _CheckHVParams(self, node_list, hv_name, hv_params)
4109 # no need to check any newly-enabled hypervisors, since the
4110 # defaults have already been checked in the above code-block
4111 for os_name, os_hvp in self.new_os_hvp.items():
4112 for hv_name, hv_params in os_hvp.items():
4113 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4114 # we need to fill in the new os_hvp on top of the actual hv_p
4115 cluster_defaults = self.new_hvparams.get(hv_name, {})
4116 new_osp = objects.FillDict(cluster_defaults, hv_params)
4117 hv_class = hypervisor.GetHypervisor(hv_name)
4118 hv_class.CheckParameterSyntax(new_osp)
4119 _CheckHVParams(self, node_list, hv_name, new_osp)
4121 if self.op.default_iallocator:
4122 alloc_script = utils.FindFile(self.op.default_iallocator,
4123 constants.IALLOCATOR_SEARCH_PATH,
4125 if alloc_script is None:
4126 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4127 " specified" % self.op.default_iallocator,
4130 def Exec(self, feedback_fn):
4131 """Change the parameters of the cluster.
4134 if self.op.vg_name is not None:
4135 new_volume = self.op.vg_name
4138 if new_volume != self.cfg.GetVGName():
4139 self.cfg.SetVGName(new_volume)
4141 feedback_fn("Cluster LVM configuration already in desired"
4142 " state, not changing")
4143 if self.op.drbd_helper is not None:
4144 new_helper = self.op.drbd_helper
4147 if new_helper != self.cfg.GetDRBDHelper():
4148 self.cfg.SetDRBDHelper(new_helper)
4150 feedback_fn("Cluster DRBD helper already in desired state,"
4152 if self.op.hvparams:
4153 self.cluster.hvparams = self.new_hvparams
4155 self.cluster.os_hvp = self.new_os_hvp
4156 if self.op.enabled_hypervisors is not None:
4157 self.cluster.hvparams = self.new_hvparams
4158 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4159 if self.op.beparams:
4160 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4161 if self.op.nicparams:
4162 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4164 self.cluster.ipolicy = self.new_ipolicy
4165 if self.op.osparams:
4166 self.cluster.osparams = self.new_osp
4167 if self.op.ndparams:
4168 self.cluster.ndparams = self.new_ndparams
4169 if self.op.diskparams:
4170 self.cluster.diskparams = self.new_diskparams
4171 if self.op.hv_state:
4172 self.cluster.hv_state_static = self.new_hv_state
4173 if self.op.disk_state:
4174 self.cluster.disk_state_static = self.new_disk_state
4176 if self.op.candidate_pool_size is not None:
4177 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4178 # we need to update the pool size here, otherwise the save will fail
4179 _AdjustCandidatePool(self, [])
4181 if self.op.maintain_node_health is not None:
4182 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4183 feedback_fn("Note: CONFD was disabled at build time, node health"
4184 " maintenance is not useful (still enabling it)")
4185 self.cluster.maintain_node_health = self.op.maintain_node_health
4187 if self.op.prealloc_wipe_disks is not None:
4188 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4190 if self.op.add_uids is not None:
4191 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4193 if self.op.remove_uids is not None:
4194 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4196 if self.op.uid_pool is not None:
4197 self.cluster.uid_pool = self.op.uid_pool
4199 if self.op.default_iallocator is not None:
4200 self.cluster.default_iallocator = self.op.default_iallocator
4202 if self.op.reserved_lvs is not None:
4203 self.cluster.reserved_lvs = self.op.reserved_lvs
4205 if self.op.use_external_mip_script is not None:
4206 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4208 def helper_os(aname, mods, desc):
4210 lst = getattr(self.cluster, aname)
4211 for key, val in mods:
4212 if key == constants.DDM_ADD:
4214 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4217 elif key == constants.DDM_REMOVE:
4221 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4223 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4225 if self.op.hidden_os:
4226 helper_os("hidden_os", self.op.hidden_os, "hidden")
4228 if self.op.blacklisted_os:
4229 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4231 if self.op.master_netdev:
4232 master_params = self.cfg.GetMasterNetworkParameters()
4233 ems = self.cfg.GetUseExternalMipScript()
4234 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4235 self.cluster.master_netdev)
4236 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4238 result.Raise("Could not disable the master ip")
4239 feedback_fn("Changing master_netdev from %s to %s" %
4240 (master_params.netdev, self.op.master_netdev))
4241 self.cluster.master_netdev = self.op.master_netdev
4243 if self.op.master_netmask:
4244 master_params = self.cfg.GetMasterNetworkParameters()
4245 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4246 result = self.rpc.call_node_change_master_netmask(master_params.name,
4247 master_params.netmask,
4248 self.op.master_netmask,
4250 master_params.netdev)
4252 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4255 self.cluster.master_netmask = self.op.master_netmask
4257 self.cfg.Update(self.cluster, feedback_fn)
4259 if self.op.master_netdev:
4260 master_params = self.cfg.GetMasterNetworkParameters()
4261 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4262 self.op.master_netdev)
4263 ems = self.cfg.GetUseExternalMipScript()
4264 result = self.rpc.call_node_activate_master_ip(master_params.name,
4267 self.LogWarning("Could not re-enable the master ip on"
4268 " the master, please restart manually: %s",
4272 def _UploadHelper(lu, nodes, fname):
4273 """Helper for uploading a file and showing warnings.
4276 if os.path.exists(fname):
4277 result = lu.rpc.call_upload_file(nodes, fname)
4278 for to_node, to_result in result.items():
4279 msg = to_result.fail_msg
4281 msg = ("Copy of file %s to node %s failed: %s" %
4282 (fname, to_node, msg))
4283 lu.proc.LogWarning(msg)
4286 def _ComputeAncillaryFiles(cluster, redist):
4287 """Compute files external to Ganeti which need to be consistent.
4289 @type redist: boolean
4290 @param redist: Whether to include files which need to be redistributed
4293 # Compute files for all nodes
4295 constants.SSH_KNOWN_HOSTS_FILE,
4296 constants.CONFD_HMAC_KEY,
4297 constants.CLUSTER_DOMAIN_SECRET_FILE,
4298 constants.SPICE_CERT_FILE,
4299 constants.SPICE_CACERT_FILE,
4300 constants.RAPI_USERS_FILE,
4304 files_all.update(constants.ALL_CERT_FILES)
4305 files_all.update(ssconf.SimpleStore().GetFileList())
4307 # we need to ship at least the RAPI certificate
4308 files_all.add(constants.RAPI_CERT_FILE)
4310 if cluster.modify_etc_hosts:
4311 files_all.add(constants.ETC_HOSTS)
4313 if cluster.use_external_mip_script:
4314 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4316 # Files which are optional, these must:
4317 # - be present in one other category as well
4318 # - either exist or not exist on all nodes of that category (mc, vm all)
4320 constants.RAPI_USERS_FILE,
4323 # Files which should only be on master candidates
4327 files_mc.add(constants.CLUSTER_CONF_FILE)
4329 # Files which should only be on VM-capable nodes
4330 files_vm = set(filename
4331 for hv_name in cluster.enabled_hypervisors
4332 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4334 files_opt |= set(filename
4335 for hv_name in cluster.enabled_hypervisors
4336 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4338 # Filenames in each category must be unique
4339 all_files_set = files_all | files_mc | files_vm
4340 assert (len(all_files_set) ==
4341 sum(map(len, [files_all, files_mc, files_vm]))), \
4342 "Found file listed in more than one file list"
4344 # Optional files must be present in one other category
4345 assert all_files_set.issuperset(files_opt), \
4346 "Optional file not in a different required list"
4348 return (files_all, files_opt, files_mc, files_vm)
4351 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4352 """Distribute additional files which are part of the cluster configuration.
4354 ConfigWriter takes care of distributing the config and ssconf files, but
4355 there are more files which should be distributed to all nodes. This function
4356 makes sure those are copied.
4358 @param lu: calling logical unit
4359 @param additional_nodes: list of nodes not in the config to distribute to
4360 @type additional_vm: boolean
4361 @param additional_vm: whether the additional nodes are vm-capable or not
4364 # Gather target nodes
4365 cluster = lu.cfg.GetClusterInfo()
4366 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4368 online_nodes = lu.cfg.GetOnlineNodeList()
4369 online_set = frozenset(online_nodes)
4370 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4372 if additional_nodes is not None:
4373 online_nodes.extend(additional_nodes)
4375 vm_nodes.extend(additional_nodes)
4377 # Never distribute to master node
4378 for nodelist in [online_nodes, vm_nodes]:
4379 if master_info.name in nodelist:
4380 nodelist.remove(master_info.name)
4383 (files_all, _, files_mc, files_vm) = \
4384 _ComputeAncillaryFiles(cluster, True)
4386 # Never re-distribute configuration file from here
4387 assert not (constants.CLUSTER_CONF_FILE in files_all or
4388 constants.CLUSTER_CONF_FILE in files_vm)
4389 assert not files_mc, "Master candidates not handled in this function"
4392 (online_nodes, files_all),
4393 (vm_nodes, files_vm),
4397 for (node_list, files) in filemap:
4399 _UploadHelper(lu, node_list, fname)
4402 class LUClusterRedistConf(NoHooksLU):
4403 """Force the redistribution of cluster configuration.
4405 This is a very simple LU.
4410 def ExpandNames(self):
4411 self.needed_locks = {
4412 locking.LEVEL_NODE: locking.ALL_SET,
4414 self.share_locks[locking.LEVEL_NODE] = 1
4416 def Exec(self, feedback_fn):
4417 """Redistribute the configuration.
4420 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4421 _RedistributeAncillaryFiles(self)
4424 class LUClusterActivateMasterIp(NoHooksLU):
4425 """Activate the master IP on the master node.
4428 def Exec(self, feedback_fn):
4429 """Activate the master IP.
4432 master_params = self.cfg.GetMasterNetworkParameters()
4433 ems = self.cfg.GetUseExternalMipScript()
4434 result = self.rpc.call_node_activate_master_ip(master_params.name,
4436 result.Raise("Could not activate the master IP")
4439 class LUClusterDeactivateMasterIp(NoHooksLU):
4440 """Deactivate the master IP on the master node.
4443 def Exec(self, feedback_fn):
4444 """Deactivate the master IP.
4447 master_params = self.cfg.GetMasterNetworkParameters()
4448 ems = self.cfg.GetUseExternalMipScript()
4449 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4451 result.Raise("Could not deactivate the master IP")
4454 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4455 """Sleep and poll for an instance's disk to sync.
4458 if not instance.disks or disks is not None and not disks:
4461 disks = _ExpandCheckDisks(instance, disks)
4464 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4466 node = instance.primary_node
4469 lu.cfg.SetDiskID(dev, node)
4471 # TODO: Convert to utils.Retry
4474 degr_retries = 10 # in seconds, as we sleep 1 second each time
4478 cumul_degraded = False
4479 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4480 msg = rstats.fail_msg
4482 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4485 raise errors.RemoteError("Can't contact node %s for mirror data,"
4486 " aborting." % node)
4489 rstats = rstats.payload
4491 for i, mstat in enumerate(rstats):
4493 lu.LogWarning("Can't compute data for node %s/%s",
4494 node, disks[i].iv_name)
4497 cumul_degraded = (cumul_degraded or
4498 (mstat.is_degraded and mstat.sync_percent is None))
4499 if mstat.sync_percent is not None:
4501 if mstat.estimated_time is not None:
4502 rem_time = ("%s remaining (estimated)" %
4503 utils.FormatSeconds(mstat.estimated_time))
4504 max_time = mstat.estimated_time
4506 rem_time = "no time estimate"
4507 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4508 (disks[i].iv_name, mstat.sync_percent, rem_time))
4510 # if we're done but degraded, let's do a few small retries, to
4511 # make sure we see a stable and not transient situation; therefore
4512 # we force restart of the loop
4513 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4514 logging.info("Degraded disks found, %d retries left", degr_retries)
4522 time.sleep(min(60, max_time))
4525 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4526 return not cumul_degraded
4529 def _BlockdevFind(lu, node, dev, instance):
4530 """Wrapper around call_blockdev_find to annotate diskparams.
4532 @param lu: A reference to the lu object
4533 @param node: The node to call out
4534 @param dev: The device to find
4535 @param instance: The instance object the device belongs to
4536 @returns The result of the rpc call
4539 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4540 return lu.rpc.call_blockdev_find(node, disk)
4543 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4544 """Wrapper around L{_CheckDiskConsistencyInner}.
4547 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4548 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4552 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4554 """Check that mirrors are not degraded.
4556 @attention: The device has to be annotated already.
4558 The ldisk parameter, if True, will change the test from the
4559 is_degraded attribute (which represents overall non-ok status for
4560 the device(s)) to the ldisk (representing the local storage status).
4563 lu.cfg.SetDiskID(dev, node)
4567 if on_primary or dev.AssembleOnSecondary():
4568 rstats = lu.rpc.call_blockdev_find(node, dev)
4569 msg = rstats.fail_msg
4571 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4573 elif not rstats.payload:
4574 lu.LogWarning("Can't find disk on node %s", node)
4578 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4580 result = result and not rstats.payload.is_degraded
4583 for child in dev.children:
4584 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4590 class LUOobCommand(NoHooksLU):
4591 """Logical unit for OOB handling.
4595 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4597 def ExpandNames(self):
4598 """Gather locks we need.
4601 if self.op.node_names:
4602 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4603 lock_names = self.op.node_names
4605 lock_names = locking.ALL_SET
4607 self.needed_locks = {
4608 locking.LEVEL_NODE: lock_names,
4611 def CheckPrereq(self):
4612 """Check prerequisites.
4615 - the node exists in the configuration
4618 Any errors are signaled by raising errors.OpPrereqError.
4622 self.master_node = self.cfg.GetMasterNode()
4624 assert self.op.power_delay >= 0.0
4626 if self.op.node_names:
4627 if (self.op.command in self._SKIP_MASTER and
4628 self.master_node in self.op.node_names):
4629 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4630 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4632 if master_oob_handler:
4633 additional_text = ("run '%s %s %s' if you want to operate on the"
4634 " master regardless") % (master_oob_handler,
4638 additional_text = "it does not support out-of-band operations"
4640 raise errors.OpPrereqError(("Operating on the master node %s is not"
4641 " allowed for %s; %s") %
4642 (self.master_node, self.op.command,
4643 additional_text), errors.ECODE_INVAL)
4645 self.op.node_names = self.cfg.GetNodeList()
4646 if self.op.command in self._SKIP_MASTER:
4647 self.op.node_names.remove(self.master_node)
4649 if self.op.command in self._SKIP_MASTER:
4650 assert self.master_node not in self.op.node_names
4652 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4654 raise errors.OpPrereqError("Node %s not found" % node_name,
4657 self.nodes.append(node)
4659 if (not self.op.ignore_status and
4660 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4661 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4662 " not marked offline") % node_name,
4665 def Exec(self, feedback_fn):
4666 """Execute OOB and return result if we expect any.
4669 master_node = self.master_node
4672 for idx, node in enumerate(utils.NiceSort(self.nodes,
4673 key=lambda node: node.name)):
4674 node_entry = [(constants.RS_NORMAL, node.name)]
4675 ret.append(node_entry)
4677 oob_program = _SupportsOob(self.cfg, node)
4680 node_entry.append((constants.RS_UNAVAIL, None))
4683 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4684 self.op.command, oob_program, node.name)
4685 result = self.rpc.call_run_oob(master_node, oob_program,
4686 self.op.command, node.name,
4690 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4691 node.name, result.fail_msg)
4692 node_entry.append((constants.RS_NODATA, None))
4695 self._CheckPayload(result)
4696 except errors.OpExecError, err:
4697 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4699 node_entry.append((constants.RS_NODATA, None))
4701 if self.op.command == constants.OOB_HEALTH:
4702 # For health we should log important events
4703 for item, status in result.payload:
4704 if status in [constants.OOB_STATUS_WARNING,
4705 constants.OOB_STATUS_CRITICAL]:
4706 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4707 item, node.name, status)
4709 if self.op.command == constants.OOB_POWER_ON:
4711 elif self.op.command == constants.OOB_POWER_OFF:
4712 node.powered = False
4713 elif self.op.command == constants.OOB_POWER_STATUS:
4714 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4715 if powered != node.powered:
4716 logging.warning(("Recorded power state (%s) of node '%s' does not"
4717 " match actual power state (%s)"), node.powered,
4720 # For configuration changing commands we should update the node
4721 if self.op.command in (constants.OOB_POWER_ON,
4722 constants.OOB_POWER_OFF):
4723 self.cfg.Update(node, feedback_fn)
4725 node_entry.append((constants.RS_NORMAL, result.payload))
4727 if (self.op.command == constants.OOB_POWER_ON and
4728 idx < len(self.nodes) - 1):
4729 time.sleep(self.op.power_delay)
4733 def _CheckPayload(self, result):
4734 """Checks if the payload is valid.
4736 @param result: RPC result
4737 @raises errors.OpExecError: If payload is not valid
4741 if self.op.command == constants.OOB_HEALTH:
4742 if not isinstance(result.payload, list):
4743 errs.append("command 'health' is expected to return a list but got %s" %
4744 type(result.payload))
4746 for item, status in result.payload:
4747 if status not in constants.OOB_STATUSES:
4748 errs.append("health item '%s' has invalid status '%s'" %
4751 if self.op.command == constants.OOB_POWER_STATUS:
4752 if not isinstance(result.payload, dict):
4753 errs.append("power-status is expected to return a dict but got %s" %
4754 type(result.payload))
4756 if self.op.command in [
4757 constants.OOB_POWER_ON,
4758 constants.OOB_POWER_OFF,
4759 constants.OOB_POWER_CYCLE,
4761 if result.payload is not None:
4762 errs.append("%s is expected to not return payload but got '%s'" %
4763 (self.op.command, result.payload))
4766 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4767 utils.CommaJoin(errs))
4770 class _OsQuery(_QueryBase):
4771 FIELDS = query.OS_FIELDS
4773 def ExpandNames(self, lu):
4774 # Lock all nodes in shared mode
4775 # Temporary removal of locks, should be reverted later
4776 # TODO: reintroduce locks when they are lighter-weight
4777 lu.needed_locks = {}
4778 #self.share_locks[locking.LEVEL_NODE] = 1
4779 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4781 # The following variables interact with _QueryBase._GetNames
4783 self.wanted = self.names
4785 self.wanted = locking.ALL_SET
4787 self.do_locking = self.use_locking
4789 def DeclareLocks(self, lu, level):
4793 def _DiagnoseByOS(rlist):
4794 """Remaps a per-node return list into an a per-os per-node dictionary
4796 @param rlist: a map with node names as keys and OS objects as values
4799 @return: a dictionary with osnames as keys and as value another
4800 map, with nodes as keys and tuples of (path, status, diagnose,
4801 variants, parameters, api_versions) as values, eg::
4803 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4804 (/srv/..., False, "invalid api")],
4805 "node2": [(/srv/..., True, "", [], [])]}
4810 # we build here the list of nodes that didn't fail the RPC (at RPC
4811 # level), so that nodes with a non-responding node daemon don't
4812 # make all OSes invalid
4813 good_nodes = [node_name for node_name in rlist
4814 if not rlist[node_name].fail_msg]
4815 for node_name, nr in rlist.items():
4816 if nr.fail_msg or not nr.payload:
4818 for (name, path, status, diagnose, variants,
4819 params, api_versions) in nr.payload:
4820 if name not in all_os:
4821 # build a list of nodes for this os containing empty lists
4822 # for each node in node_list
4824 for nname in good_nodes:
4825 all_os[name][nname] = []
4826 # convert params from [name, help] to (name, help)
4827 params = [tuple(v) for v in params]
4828 all_os[name][node_name].append((path, status, diagnose,
4829 variants, params, api_versions))
4832 def _GetQueryData(self, lu):
4833 """Computes the list of nodes and their attributes.
4836 # Locking is not used
4837 assert not (compat.any(lu.glm.is_owned(level)
4838 for level in locking.LEVELS
4839 if level != locking.LEVEL_CLUSTER) or
4840 self.do_locking or self.use_locking)
4842 valid_nodes = [node.name
4843 for node in lu.cfg.GetAllNodesInfo().values()
4844 if not node.offline and node.vm_capable]
4845 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4846 cluster = lu.cfg.GetClusterInfo()
4850 for (os_name, os_data) in pol.items():
4851 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4852 hidden=(os_name in cluster.hidden_os),
4853 blacklisted=(os_name in cluster.blacklisted_os))
4857 api_versions = set()
4859 for idx, osl in enumerate(os_data.values()):
4860 info.valid = bool(info.valid and osl and osl[0][1])
4864 (node_variants, node_params, node_api) = osl[0][3:6]
4867 variants.update(node_variants)
4868 parameters.update(node_params)
4869 api_versions.update(node_api)
4871 # Filter out inconsistent values
4872 variants.intersection_update(node_variants)
4873 parameters.intersection_update(node_params)
4874 api_versions.intersection_update(node_api)
4876 info.variants = list(variants)
4877 info.parameters = list(parameters)
4878 info.api_versions = list(api_versions)
4880 data[os_name] = info
4882 # Prepare data in requested order
4883 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4887 class LUOsDiagnose(NoHooksLU):
4888 """Logical unit for OS diagnose/query.
4894 def _BuildFilter(fields, names):
4895 """Builds a filter for querying OSes.
4898 name_filter = qlang.MakeSimpleFilter("name", names)
4900 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4901 # respective field is not requested
4902 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4903 for fname in ["hidden", "blacklisted"]
4904 if fname not in fields]
4905 if "valid" not in fields:
4906 status_filter.append([qlang.OP_TRUE, "valid"])
4909 status_filter.insert(0, qlang.OP_AND)
4911 status_filter = None
4913 if name_filter and status_filter:
4914 return [qlang.OP_AND, name_filter, status_filter]
4918 return status_filter
4920 def CheckArguments(self):
4921 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4922 self.op.output_fields, False)
4924 def ExpandNames(self):
4925 self.oq.ExpandNames(self)
4927 def Exec(self, feedback_fn):
4928 return self.oq.OldStyleQuery(self)
4931 class LUNodeRemove(LogicalUnit):
4932 """Logical unit for removing a node.
4935 HPATH = "node-remove"
4936 HTYPE = constants.HTYPE_NODE
4938 def BuildHooksEnv(self):
4943 "OP_TARGET": self.op.node_name,
4944 "NODE_NAME": self.op.node_name,
4947 def BuildHooksNodes(self):
4948 """Build hooks nodes.
4950 This doesn't run on the target node in the pre phase as a failed
4951 node would then be impossible to remove.
4954 all_nodes = self.cfg.GetNodeList()
4956 all_nodes.remove(self.op.node_name)
4959 return (all_nodes, all_nodes)
4961 def CheckPrereq(self):
4962 """Check prerequisites.
4965 - the node exists in the configuration
4966 - it does not have primary or secondary instances
4967 - it's not the master
4969 Any errors are signaled by raising errors.OpPrereqError.
4972 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4973 node = self.cfg.GetNodeInfo(self.op.node_name)
4974 assert node is not None
4976 masternode = self.cfg.GetMasterNode()
4977 if node.name == masternode:
4978 raise errors.OpPrereqError("Node is the master node, failover to another"
4979 " node is required", errors.ECODE_INVAL)
4981 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4982 if node.name in instance.all_nodes:
4983 raise errors.OpPrereqError("Instance %s is still running on the node,"
4984 " please remove first" % instance_name,
4986 self.op.node_name = node.name
4989 def Exec(self, feedback_fn):
4990 """Removes the node from the cluster.
4994 logging.info("Stopping the node daemon and removing configs from node %s",
4997 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4999 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5002 # Promote nodes to master candidate as needed
5003 _AdjustCandidatePool(self, exceptions=[node.name])
5004 self.context.RemoveNode(node.name)
5006 # Run post hooks on the node before it's removed
5007 _RunPostHook(self, node.name)
5009 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5010 msg = result.fail_msg
5012 self.LogWarning("Errors encountered on the remote node while leaving"
5013 " the cluster: %s", msg)
5015 # Remove node from our /etc/hosts
5016 if self.cfg.GetClusterInfo().modify_etc_hosts:
5017 master_node = self.cfg.GetMasterNode()
5018 result = self.rpc.call_etc_hosts_modify(master_node,
5019 constants.ETC_HOSTS_REMOVE,
5021 result.Raise("Can't update hosts file with new host data")
5022 _RedistributeAncillaryFiles(self)
5025 class _NodeQuery(_QueryBase):
5026 FIELDS = query.NODE_FIELDS
5028 def ExpandNames(self, lu):
5029 lu.needed_locks = {}
5030 lu.share_locks = _ShareAll()
5033 self.wanted = _GetWantedNodes(lu, self.names)
5035 self.wanted = locking.ALL_SET
5037 self.do_locking = (self.use_locking and
5038 query.NQ_LIVE in self.requested_data)
5041 # If any non-static field is requested we need to lock the nodes
5042 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5044 def DeclareLocks(self, lu, level):
5047 def _GetQueryData(self, lu):
5048 """Computes the list of nodes and their attributes.
5051 all_info = lu.cfg.GetAllNodesInfo()
5053 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5055 # Gather data as requested
5056 if query.NQ_LIVE in self.requested_data:
5057 # filter out non-vm_capable nodes
5058 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5060 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5061 [lu.cfg.GetHypervisorType()])
5062 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5063 for (name, nresult) in node_data.items()
5064 if not nresult.fail_msg and nresult.payload)
5068 if query.NQ_INST in self.requested_data:
5069 node_to_primary = dict([(name, set()) for name in nodenames])
5070 node_to_secondary = dict([(name, set()) for name in nodenames])
5072 inst_data = lu.cfg.GetAllInstancesInfo()
5074 for inst in inst_data.values():
5075 if inst.primary_node in node_to_primary:
5076 node_to_primary[inst.primary_node].add(inst.name)
5077 for secnode in inst.secondary_nodes:
5078 if secnode in node_to_secondary:
5079 node_to_secondary[secnode].add(inst.name)
5081 node_to_primary = None
5082 node_to_secondary = None
5084 if query.NQ_OOB in self.requested_data:
5085 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5086 for name, node in all_info.iteritems())
5090 if query.NQ_GROUP in self.requested_data:
5091 groups = lu.cfg.GetAllNodeGroupsInfo()
5095 return query.NodeQueryData([all_info[name] for name in nodenames],
5096 live_data, lu.cfg.GetMasterNode(),
5097 node_to_primary, node_to_secondary, groups,
5098 oob_support, lu.cfg.GetClusterInfo())
5101 class LUNodeQuery(NoHooksLU):
5102 """Logical unit for querying nodes.
5105 # pylint: disable=W0142
5108 def CheckArguments(self):
5109 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5110 self.op.output_fields, self.op.use_locking)
5112 def ExpandNames(self):
5113 self.nq.ExpandNames(self)
5115 def DeclareLocks(self, level):
5116 self.nq.DeclareLocks(self, level)
5118 def Exec(self, feedback_fn):
5119 return self.nq.OldStyleQuery(self)
5122 class LUNodeQueryvols(NoHooksLU):
5123 """Logical unit for getting volumes on node(s).
5127 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5128 _FIELDS_STATIC = utils.FieldSet("node")
5130 def CheckArguments(self):
5131 _CheckOutputFields(static=self._FIELDS_STATIC,
5132 dynamic=self._FIELDS_DYNAMIC,
5133 selected=self.op.output_fields)
5135 def ExpandNames(self):
5136 self.share_locks = _ShareAll()
5137 self.needed_locks = {}
5139 if not self.op.nodes:
5140 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5142 self.needed_locks[locking.LEVEL_NODE] = \
5143 _GetWantedNodes(self, self.op.nodes)
5145 def Exec(self, feedback_fn):
5146 """Computes the list of nodes and their attributes.
5149 nodenames = self.owned_locks(locking.LEVEL_NODE)
5150 volumes = self.rpc.call_node_volumes(nodenames)
5152 ilist = self.cfg.GetAllInstancesInfo()
5153 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5156 for node in nodenames:
5157 nresult = volumes[node]
5160 msg = nresult.fail_msg
5162 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5165 node_vols = sorted(nresult.payload,
5166 key=operator.itemgetter("dev"))
5168 for vol in node_vols:
5170 for field in self.op.output_fields:
5173 elif field == "phys":
5177 elif field == "name":
5179 elif field == "size":
5180 val = int(float(vol["size"]))
5181 elif field == "instance":
5182 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5184 raise errors.ParameterError(field)
5185 node_output.append(str(val))
5187 output.append(node_output)
5192 class LUNodeQueryStorage(NoHooksLU):
5193 """Logical unit for getting information on storage units on node(s).
5196 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5199 def CheckArguments(self):
5200 _CheckOutputFields(static=self._FIELDS_STATIC,
5201 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5202 selected=self.op.output_fields)
5204 def ExpandNames(self):
5205 self.share_locks = _ShareAll()
5206 self.needed_locks = {}
5209 self.needed_locks[locking.LEVEL_NODE] = \
5210 _GetWantedNodes(self, self.op.nodes)
5212 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5214 def Exec(self, feedback_fn):
5215 """Computes the list of nodes and their attributes.
5218 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5220 # Always get name to sort by
5221 if constants.SF_NAME in self.op.output_fields:
5222 fields = self.op.output_fields[:]
5224 fields = [constants.SF_NAME] + self.op.output_fields
5226 # Never ask for node or type as it's only known to the LU
5227 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5228 while extra in fields:
5229 fields.remove(extra)
5231 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5232 name_idx = field_idx[constants.SF_NAME]
5234 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5235 data = self.rpc.call_storage_list(self.nodes,
5236 self.op.storage_type, st_args,
5237 self.op.name, fields)
5241 for node in utils.NiceSort(self.nodes):
5242 nresult = data[node]
5246 msg = nresult.fail_msg
5248 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5251 rows = dict([(row[name_idx], row) for row in nresult.payload])
5253 for name in utils.NiceSort(rows.keys()):
5258 for field in self.op.output_fields:
5259 if field == constants.SF_NODE:
5261 elif field == constants.SF_TYPE:
5262 val = self.op.storage_type
5263 elif field in field_idx:
5264 val = row[field_idx[field]]
5266 raise errors.ParameterError(field)
5275 class _InstanceQuery(_QueryBase):
5276 FIELDS = query.INSTANCE_FIELDS
5278 def ExpandNames(self, lu):
5279 lu.needed_locks = {}
5280 lu.share_locks = _ShareAll()
5283 self.wanted = _GetWantedInstances(lu, self.names)
5285 self.wanted = locking.ALL_SET
5287 self.do_locking = (self.use_locking and
5288 query.IQ_LIVE in self.requested_data)
5290 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5291 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5292 lu.needed_locks[locking.LEVEL_NODE] = []
5293 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5295 self.do_grouplocks = (self.do_locking and
5296 query.IQ_NODES in self.requested_data)
5298 def DeclareLocks(self, lu, level):
5300 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5301 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5303 # Lock all groups used by instances optimistically; this requires going
5304 # via the node before it's locked, requiring verification later on
5305 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5307 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5308 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5309 elif level == locking.LEVEL_NODE:
5310 lu._LockInstancesNodes() # pylint: disable=W0212
5313 def _CheckGroupLocks(lu):
5314 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5315 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5317 # Check if node groups for locked instances are still correct
5318 for instance_name in owned_instances:
5319 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5321 def _GetQueryData(self, lu):
5322 """Computes the list of instances and their attributes.
5325 if self.do_grouplocks:
5326 self._CheckGroupLocks(lu)
5328 cluster = lu.cfg.GetClusterInfo()
5329 all_info = lu.cfg.GetAllInstancesInfo()
5331 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5333 instance_list = [all_info[name] for name in instance_names]
5334 nodes = frozenset(itertools.chain(*(inst.all_nodes
5335 for inst in instance_list)))
5336 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5339 wrongnode_inst = set()
5341 # Gather data as requested
5342 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5344 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5346 result = node_data[name]
5348 # offline nodes will be in both lists
5349 assert result.fail_msg
5350 offline_nodes.append(name)
5352 bad_nodes.append(name)
5353 elif result.payload:
5354 for inst in result.payload:
5355 if inst in all_info:
5356 if all_info[inst].primary_node == name:
5357 live_data.update(result.payload)
5359 wrongnode_inst.add(inst)
5361 # orphan instance; we don't list it here as we don't
5362 # handle this case yet in the output of instance listing
5363 logging.warning("Orphan instance '%s' found on node %s",
5365 # else no instance is alive
5369 if query.IQ_DISKUSAGE in self.requested_data:
5370 disk_usage = dict((inst.name,
5371 _ComputeDiskSize(inst.disk_template,
5372 [{constants.IDISK_SIZE: disk.size}
5373 for disk in inst.disks]))
5374 for inst in instance_list)
5378 if query.IQ_CONSOLE in self.requested_data:
5380 for inst in instance_list:
5381 if inst.name in live_data:
5382 # Instance is running
5383 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5385 consinfo[inst.name] = None
5386 assert set(consinfo.keys()) == set(instance_names)
5390 if query.IQ_NODES in self.requested_data:
5391 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5393 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5394 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5395 for uuid in set(map(operator.attrgetter("group"),
5401 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5402 disk_usage, offline_nodes, bad_nodes,
5403 live_data, wrongnode_inst, consinfo,
5407 class LUQuery(NoHooksLU):
5408 """Query for resources/items of a certain kind.
5411 # pylint: disable=W0142
5414 def CheckArguments(self):
5415 qcls = _GetQueryImplementation(self.op.what)
5417 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5419 def ExpandNames(self):
5420 self.impl.ExpandNames(self)
5422 def DeclareLocks(self, level):
5423 self.impl.DeclareLocks(self, level)
5425 def Exec(self, feedback_fn):
5426 return self.impl.NewStyleQuery(self)
5429 class LUQueryFields(NoHooksLU):
5430 """Query for resources/items of a certain kind.
5433 # pylint: disable=W0142
5436 def CheckArguments(self):
5437 self.qcls = _GetQueryImplementation(self.op.what)
5439 def ExpandNames(self):
5440 self.needed_locks = {}
5442 def Exec(self, feedback_fn):
5443 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5446 class LUNodeModifyStorage(NoHooksLU):
5447 """Logical unit for modifying a storage volume on a node.
5452 def CheckArguments(self):
5453 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5455 storage_type = self.op.storage_type
5458 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5460 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5461 " modified" % storage_type,
5464 diff = set(self.op.changes.keys()) - modifiable
5466 raise errors.OpPrereqError("The following fields can not be modified for"
5467 " storage units of type '%s': %r" %
5468 (storage_type, list(diff)),
5471 def ExpandNames(self):
5472 self.needed_locks = {
5473 locking.LEVEL_NODE: self.op.node_name,
5476 def Exec(self, feedback_fn):
5477 """Computes the list of nodes and their attributes.
5480 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5481 result = self.rpc.call_storage_modify(self.op.node_name,
5482 self.op.storage_type, st_args,
5483 self.op.name, self.op.changes)
5484 result.Raise("Failed to modify storage unit '%s' on %s" %
5485 (self.op.name, self.op.node_name))
5488 class LUNodeAdd(LogicalUnit):
5489 """Logical unit for adding node to the cluster.
5493 HTYPE = constants.HTYPE_NODE
5494 _NFLAGS = ["master_capable", "vm_capable"]
5496 def CheckArguments(self):
5497 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5498 # validate/normalize the node name
5499 self.hostname = netutils.GetHostname(name=self.op.node_name,
5500 family=self.primary_ip_family)
5501 self.op.node_name = self.hostname.name
5503 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5504 raise errors.OpPrereqError("Cannot readd the master node",
5507 if self.op.readd and self.op.group:
5508 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5509 " being readded", errors.ECODE_INVAL)
5511 def BuildHooksEnv(self):
5514 This will run on all nodes before, and on all nodes + the new node after.
5518 "OP_TARGET": self.op.node_name,
5519 "NODE_NAME": self.op.node_name,
5520 "NODE_PIP": self.op.primary_ip,
5521 "NODE_SIP": self.op.secondary_ip,
5522 "MASTER_CAPABLE": str(self.op.master_capable),
5523 "VM_CAPABLE": str(self.op.vm_capable),
5526 def BuildHooksNodes(self):
5527 """Build hooks nodes.
5530 # Exclude added node
5531 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5532 post_nodes = pre_nodes + [self.op.node_name, ]
5534 return (pre_nodes, post_nodes)
5536 def CheckPrereq(self):
5537 """Check prerequisites.
5540 - the new node is not already in the config
5542 - its parameters (single/dual homed) matches the cluster
5544 Any errors are signaled by raising errors.OpPrereqError.
5548 hostname = self.hostname
5549 node = hostname.name
5550 primary_ip = self.op.primary_ip = hostname.ip
5551 if self.op.secondary_ip is None:
5552 if self.primary_ip_family == netutils.IP6Address.family:
5553 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5554 " IPv4 address must be given as secondary",
5556 self.op.secondary_ip = primary_ip
5558 secondary_ip = self.op.secondary_ip
5559 if not netutils.IP4Address.IsValid(secondary_ip):
5560 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5561 " address" % secondary_ip, errors.ECODE_INVAL)
5563 node_list = cfg.GetNodeList()
5564 if not self.op.readd and node in node_list:
5565 raise errors.OpPrereqError("Node %s is already in the configuration" %
5566 node, errors.ECODE_EXISTS)
5567 elif self.op.readd and node not in node_list:
5568 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5571 self.changed_primary_ip = False
5573 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5574 if self.op.readd and node == existing_node_name:
5575 if existing_node.secondary_ip != secondary_ip:
5576 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5577 " address configuration as before",
5579 if existing_node.primary_ip != primary_ip:
5580 self.changed_primary_ip = True
5584 if (existing_node.primary_ip == primary_ip or
5585 existing_node.secondary_ip == primary_ip or
5586 existing_node.primary_ip == secondary_ip or
5587 existing_node.secondary_ip == secondary_ip):
5588 raise errors.OpPrereqError("New node ip address(es) conflict with"
5589 " existing node %s" % existing_node.name,
5590 errors.ECODE_NOTUNIQUE)
5592 # After this 'if' block, None is no longer a valid value for the
5593 # _capable op attributes
5595 old_node = self.cfg.GetNodeInfo(node)
5596 assert old_node is not None, "Can't retrieve locked node %s" % node
5597 for attr in self._NFLAGS:
5598 if getattr(self.op, attr) is None:
5599 setattr(self.op, attr, getattr(old_node, attr))
5601 for attr in self._NFLAGS:
5602 if getattr(self.op, attr) is None:
5603 setattr(self.op, attr, True)
5605 if self.op.readd and not self.op.vm_capable:
5606 pri, sec = cfg.GetNodeInstances(node)
5608 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5609 " flag set to false, but it already holds"
5610 " instances" % node,
5613 # check that the type of the node (single versus dual homed) is the
5614 # same as for the master
5615 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5616 master_singlehomed = myself.secondary_ip == myself.primary_ip
5617 newbie_singlehomed = secondary_ip == primary_ip
5618 if master_singlehomed != newbie_singlehomed:
5619 if master_singlehomed:
5620 raise errors.OpPrereqError("The master has no secondary ip but the"
5621 " new node has one",
5624 raise errors.OpPrereqError("The master has a secondary ip but the"
5625 " new node doesn't have one",
5628 # checks reachability
5629 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5630 raise errors.OpPrereqError("Node not reachable by ping",
5631 errors.ECODE_ENVIRON)
5633 if not newbie_singlehomed:
5634 # check reachability from my secondary ip to newbie's secondary ip
5635 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5636 source=myself.secondary_ip):
5637 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5638 " based ping to node daemon port",
5639 errors.ECODE_ENVIRON)
5646 if self.op.master_capable:
5647 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5649 self.master_candidate = False
5652 self.new_node = old_node
5654 node_group = cfg.LookupNodeGroup(self.op.group)
5655 self.new_node = objects.Node(name=node,
5656 primary_ip=primary_ip,
5657 secondary_ip=secondary_ip,
5658 master_candidate=self.master_candidate,
5659 offline=False, drained=False,
5662 if self.op.ndparams:
5663 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5665 if self.op.hv_state:
5666 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5668 if self.op.disk_state:
5669 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5671 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5672 # it a property on the base class.
5673 result = rpc.DnsOnlyRunner().call_version([node])[node]
5674 result.Raise("Can't get version information from node %s" % node)
5675 if constants.PROTOCOL_VERSION == result.payload:
5676 logging.info("Communication to node %s fine, sw version %s match",
5677 node, result.payload)
5679 raise errors.OpPrereqError("Version mismatch master version %s,"
5680 " node version %s" %
5681 (constants.PROTOCOL_VERSION, result.payload),
5682 errors.ECODE_ENVIRON)
5684 def Exec(self, feedback_fn):
5685 """Adds the new node to the cluster.
5688 new_node = self.new_node
5689 node = new_node.name
5691 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5694 # We adding a new node so we assume it's powered
5695 new_node.powered = True
5697 # for re-adds, reset the offline/drained/master-candidate flags;
5698 # we need to reset here, otherwise offline would prevent RPC calls
5699 # later in the procedure; this also means that if the re-add
5700 # fails, we are left with a non-offlined, broken node
5702 new_node.drained = new_node.offline = False # pylint: disable=W0201
5703 self.LogInfo("Readding a node, the offline/drained flags were reset")
5704 # if we demote the node, we do cleanup later in the procedure
5705 new_node.master_candidate = self.master_candidate
5706 if self.changed_primary_ip:
5707 new_node.primary_ip = self.op.primary_ip
5709 # copy the master/vm_capable flags
5710 for attr in self._NFLAGS:
5711 setattr(new_node, attr, getattr(self.op, attr))
5713 # notify the user about any possible mc promotion
5714 if new_node.master_candidate:
5715 self.LogInfo("Node will be a master candidate")
5717 if self.op.ndparams:
5718 new_node.ndparams = self.op.ndparams
5720 new_node.ndparams = {}
5722 if self.op.hv_state:
5723 new_node.hv_state_static = self.new_hv_state
5725 if self.op.disk_state:
5726 new_node.disk_state_static = self.new_disk_state
5728 # Add node to our /etc/hosts, and add key to known_hosts
5729 if self.cfg.GetClusterInfo().modify_etc_hosts:
5730 master_node = self.cfg.GetMasterNode()
5731 result = self.rpc.call_etc_hosts_modify(master_node,
5732 constants.ETC_HOSTS_ADD,
5735 result.Raise("Can't update hosts file with new host data")
5737 if new_node.secondary_ip != new_node.primary_ip:
5738 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5741 node_verify_list = [self.cfg.GetMasterNode()]
5742 node_verify_param = {
5743 constants.NV_NODELIST: ([node], {}),
5744 # TODO: do a node-net-test as well?
5747 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5748 self.cfg.GetClusterName())
5749 for verifier in node_verify_list:
5750 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5751 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5753 for failed in nl_payload:
5754 feedback_fn("ssh/hostname verification failed"
5755 " (checking from %s): %s" %
5756 (verifier, nl_payload[failed]))
5757 raise errors.OpExecError("ssh/hostname verification failed")
5760 _RedistributeAncillaryFiles(self)
5761 self.context.ReaddNode(new_node)
5762 # make sure we redistribute the config
5763 self.cfg.Update(new_node, feedback_fn)
5764 # and make sure the new node will not have old files around
5765 if not new_node.master_candidate:
5766 result = self.rpc.call_node_demote_from_mc(new_node.name)
5767 msg = result.fail_msg
5769 self.LogWarning("Node failed to demote itself from master"
5770 " candidate status: %s" % msg)
5772 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5773 additional_vm=self.op.vm_capable)
5774 self.context.AddNode(new_node, self.proc.GetECId())
5777 class LUNodeSetParams(LogicalUnit):
5778 """Modifies the parameters of a node.
5780 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5781 to the node role (as _ROLE_*)
5782 @cvar _R2F: a dictionary from node role to tuples of flags
5783 @cvar _FLAGS: a list of attribute names corresponding to the flags
5786 HPATH = "node-modify"
5787 HTYPE = constants.HTYPE_NODE
5789 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5791 (True, False, False): _ROLE_CANDIDATE,
5792 (False, True, False): _ROLE_DRAINED,
5793 (False, False, True): _ROLE_OFFLINE,
5794 (False, False, False): _ROLE_REGULAR,
5796 _R2F = dict((v, k) for k, v in _F2R.items())
5797 _FLAGS = ["master_candidate", "drained", "offline"]
5799 def CheckArguments(self):
5800 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5801 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5802 self.op.master_capable, self.op.vm_capable,
5803 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5805 if all_mods.count(None) == len(all_mods):
5806 raise errors.OpPrereqError("Please pass at least one modification",
5808 if all_mods.count(True) > 1:
5809 raise errors.OpPrereqError("Can't set the node into more than one"
5810 " state at the same time",
5813 # Boolean value that tells us whether we might be demoting from MC
5814 self.might_demote = (self.op.master_candidate == False or
5815 self.op.offline == True or
5816 self.op.drained == True or
5817 self.op.master_capable == False)
5819 if self.op.secondary_ip:
5820 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5821 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5822 " address" % self.op.secondary_ip,
5825 self.lock_all = self.op.auto_promote and self.might_demote
5826 self.lock_instances = self.op.secondary_ip is not None
5828 def _InstanceFilter(self, instance):
5829 """Filter for getting affected instances.
5832 return (instance.disk_template in constants.DTS_INT_MIRROR and
5833 self.op.node_name in instance.all_nodes)
5835 def ExpandNames(self):
5837 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5839 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5841 # Since modifying a node can have severe effects on currently running
5842 # operations the resource lock is at least acquired in shared mode
5843 self.needed_locks[locking.LEVEL_NODE_RES] = \
5844 self.needed_locks[locking.LEVEL_NODE]
5846 # Get node resource and instance locks in shared mode; they are not used
5847 # for anything but read-only access
5848 self.share_locks[locking.LEVEL_NODE_RES] = 1
5849 self.share_locks[locking.LEVEL_INSTANCE] = 1
5851 if self.lock_instances:
5852 self.needed_locks[locking.LEVEL_INSTANCE] = \
5853 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5855 def BuildHooksEnv(self):
5858 This runs on the master node.
5862 "OP_TARGET": self.op.node_name,
5863 "MASTER_CANDIDATE": str(self.op.master_candidate),
5864 "OFFLINE": str(self.op.offline),
5865 "DRAINED": str(self.op.drained),
5866 "MASTER_CAPABLE": str(self.op.master_capable),
5867 "VM_CAPABLE": str(self.op.vm_capable),
5870 def BuildHooksNodes(self):
5871 """Build hooks nodes.
5874 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5877 def CheckPrereq(self):
5878 """Check prerequisites.
5880 This only checks the instance list against the existing names.
5883 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5885 if self.lock_instances:
5886 affected_instances = \
5887 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5889 # Verify instance locks
5890 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5891 wanted_instances = frozenset(affected_instances.keys())
5892 if wanted_instances - owned_instances:
5893 raise errors.OpPrereqError("Instances affected by changing node %s's"
5894 " secondary IP address have changed since"
5895 " locks were acquired, wanted '%s', have"
5896 " '%s'; retry the operation" %
5898 utils.CommaJoin(wanted_instances),
5899 utils.CommaJoin(owned_instances)),
5902 affected_instances = None
5904 if (self.op.master_candidate is not None or
5905 self.op.drained is not None or
5906 self.op.offline is not None):
5907 # we can't change the master's node flags
5908 if self.op.node_name == self.cfg.GetMasterNode():
5909 raise errors.OpPrereqError("The master role can be changed"
5910 " only via master-failover",
5913 if self.op.master_candidate and not node.master_capable:
5914 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5915 " it a master candidate" % node.name,
5918 if self.op.vm_capable == False:
5919 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5921 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5922 " the vm_capable flag" % node.name,
5925 if node.master_candidate and self.might_demote and not self.lock_all:
5926 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5927 # check if after removing the current node, we're missing master
5929 (mc_remaining, mc_should, _) = \
5930 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5931 if mc_remaining < mc_should:
5932 raise errors.OpPrereqError("Not enough master candidates, please"
5933 " pass auto promote option to allow"
5934 " promotion (--auto-promote or RAPI"
5935 " auto_promote=True)", errors.ECODE_STATE)
5937 self.old_flags = old_flags = (node.master_candidate,
5938 node.drained, node.offline)
5939 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5940 self.old_role = old_role = self._F2R[old_flags]
5942 # Check for ineffective changes
5943 for attr in self._FLAGS:
5944 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5945 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5946 setattr(self.op, attr, None)
5948 # Past this point, any flag change to False means a transition
5949 # away from the respective state, as only real changes are kept
5951 # TODO: We might query the real power state if it supports OOB
5952 if _SupportsOob(self.cfg, node):
5953 if self.op.offline is False and not (node.powered or
5954 self.op.powered == True):
5955 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5956 " offline status can be reset") %
5958 elif self.op.powered is not None:
5959 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5960 " as it does not support out-of-band"
5961 " handling") % self.op.node_name)
5963 # If we're being deofflined/drained, we'll MC ourself if needed
5964 if (self.op.drained == False or self.op.offline == False or
5965 (self.op.master_capable and not node.master_capable)):
5966 if _DecideSelfPromotion(self):
5967 self.op.master_candidate = True
5968 self.LogInfo("Auto-promoting node to master candidate")
5970 # If we're no longer master capable, we'll demote ourselves from MC
5971 if self.op.master_capable == False and node.master_candidate:
5972 self.LogInfo("Demoting from master candidate")
5973 self.op.master_candidate = False
5976 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5977 if self.op.master_candidate:
5978 new_role = self._ROLE_CANDIDATE
5979 elif self.op.drained:
5980 new_role = self._ROLE_DRAINED
5981 elif self.op.offline:
5982 new_role = self._ROLE_OFFLINE
5983 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5984 # False is still in new flags, which means we're un-setting (the
5986 new_role = self._ROLE_REGULAR
5987 else: # no new flags, nothing, keep old role
5990 self.new_role = new_role
5992 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5993 # Trying to transition out of offline status
5994 result = self.rpc.call_version([node.name])[node.name]
5996 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5997 " to report its version: %s" %
5998 (node.name, result.fail_msg),
6001 self.LogWarning("Transitioning node from offline to online state"
6002 " without using re-add. Please make sure the node"
6005 # When changing the secondary ip, verify if this is a single-homed to
6006 # multi-homed transition or vice versa, and apply the relevant
6008 if self.op.secondary_ip:
6009 # Ok even without locking, because this can't be changed by any LU
6010 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6011 master_singlehomed = master.secondary_ip == master.primary_ip
6012 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6013 if self.op.force and node.name == master.name:
6014 self.LogWarning("Transitioning from single-homed to multi-homed"
6015 " cluster. All nodes will require a secondary ip.")
6017 raise errors.OpPrereqError("Changing the secondary ip on a"
6018 " single-homed cluster requires the"
6019 " --force option to be passed, and the"
6020 " target node to be the master",
6022 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6023 if self.op.force and node.name == master.name:
6024 self.LogWarning("Transitioning from multi-homed to single-homed"
6025 " cluster. Secondary IPs will have to be removed.")
6027 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6028 " same as the primary IP on a multi-homed"
6029 " cluster, unless the --force option is"
6030 " passed, and the target node is the"
6031 " master", errors.ECODE_INVAL)
6033 assert not (frozenset(affected_instances) -
6034 self.owned_locks(locking.LEVEL_INSTANCE))
6037 if affected_instances:
6038 raise errors.OpPrereqError("Cannot change secondary IP address:"
6039 " offline node has instances (%s)"
6040 " configured to use it" %
6041 utils.CommaJoin(affected_instances.keys()))
6043 # On online nodes, check that no instances are running, and that
6044 # the node has the new ip and we can reach it.
6045 for instance in affected_instances.values():
6046 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6047 msg="cannot change secondary ip")
6049 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6050 if master.name != node.name:
6051 # check reachability from master secondary ip to new secondary ip
6052 if not netutils.TcpPing(self.op.secondary_ip,
6053 constants.DEFAULT_NODED_PORT,
6054 source=master.secondary_ip):
6055 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6056 " based ping to node daemon port",
6057 errors.ECODE_ENVIRON)
6059 if self.op.ndparams:
6060 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6061 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6062 self.new_ndparams = new_ndparams
6064 if self.op.hv_state:
6065 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6066 self.node.hv_state_static)
6068 if self.op.disk_state:
6069 self.new_disk_state = \
6070 _MergeAndVerifyDiskState(self.op.disk_state,
6071 self.node.disk_state_static)
6073 def Exec(self, feedback_fn):
6078 old_role = self.old_role
6079 new_role = self.new_role
6083 if self.op.ndparams:
6084 node.ndparams = self.new_ndparams
6086 if self.op.powered is not None:
6087 node.powered = self.op.powered
6089 if self.op.hv_state:
6090 node.hv_state_static = self.new_hv_state
6092 if self.op.disk_state:
6093 node.disk_state_static = self.new_disk_state
6095 for attr in ["master_capable", "vm_capable"]:
6096 val = getattr(self.op, attr)
6098 setattr(node, attr, val)
6099 result.append((attr, str(val)))
6101 if new_role != old_role:
6102 # Tell the node to demote itself, if no longer MC and not offline
6103 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6104 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6106 self.LogWarning("Node failed to demote itself: %s", msg)
6108 new_flags = self._R2F[new_role]
6109 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6111 result.append((desc, str(nf)))
6112 (node.master_candidate, node.drained, node.offline) = new_flags
6114 # we locked all nodes, we adjust the CP before updating this node
6116 _AdjustCandidatePool(self, [node.name])
6118 if self.op.secondary_ip:
6119 node.secondary_ip = self.op.secondary_ip
6120 result.append(("secondary_ip", self.op.secondary_ip))
6122 # this will trigger configuration file update, if needed
6123 self.cfg.Update(node, feedback_fn)
6125 # this will trigger job queue propagation or cleanup if the mc
6127 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6128 self.context.ReaddNode(node)
6133 class LUNodePowercycle(NoHooksLU):
6134 """Powercycles a node.
6139 def CheckArguments(self):
6140 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6141 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6142 raise errors.OpPrereqError("The node is the master and the force"
6143 " parameter was not set",
6146 def ExpandNames(self):
6147 """Locking for PowercycleNode.
6149 This is a last-resort option and shouldn't block on other
6150 jobs. Therefore, we grab no locks.
6153 self.needed_locks = {}
6155 def Exec(self, feedback_fn):
6159 result = self.rpc.call_node_powercycle(self.op.node_name,
6160 self.cfg.GetHypervisorType())
6161 result.Raise("Failed to schedule the reboot")
6162 return result.payload
6165 class LUClusterQuery(NoHooksLU):
6166 """Query cluster configuration.
6171 def ExpandNames(self):
6172 self.needed_locks = {}
6174 def Exec(self, feedback_fn):
6175 """Return cluster config.
6178 cluster = self.cfg.GetClusterInfo()
6181 # Filter just for enabled hypervisors
6182 for os_name, hv_dict in cluster.os_hvp.items():
6183 os_hvp[os_name] = {}
6184 for hv_name, hv_params in hv_dict.items():
6185 if hv_name in cluster.enabled_hypervisors:
6186 os_hvp[os_name][hv_name] = hv_params
6188 # Convert ip_family to ip_version
6189 primary_ip_version = constants.IP4_VERSION
6190 if cluster.primary_ip_family == netutils.IP6Address.family:
6191 primary_ip_version = constants.IP6_VERSION
6194 "software_version": constants.RELEASE_VERSION,
6195 "protocol_version": constants.PROTOCOL_VERSION,
6196 "config_version": constants.CONFIG_VERSION,
6197 "os_api_version": max(constants.OS_API_VERSIONS),
6198 "export_version": constants.EXPORT_VERSION,
6199 "architecture": runtime.GetArchInfo(),
6200 "name": cluster.cluster_name,
6201 "master": cluster.master_node,
6202 "default_hypervisor": cluster.primary_hypervisor,
6203 "enabled_hypervisors": cluster.enabled_hypervisors,
6204 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6205 for hypervisor_name in cluster.enabled_hypervisors]),
6207 "beparams": cluster.beparams,
6208 "osparams": cluster.osparams,
6209 "ipolicy": cluster.ipolicy,
6210 "nicparams": cluster.nicparams,
6211 "ndparams": cluster.ndparams,
6212 "diskparams": cluster.diskparams,
6213 "candidate_pool_size": cluster.candidate_pool_size,
6214 "master_netdev": cluster.master_netdev,
6215 "master_netmask": cluster.master_netmask,
6216 "use_external_mip_script": cluster.use_external_mip_script,
6217 "volume_group_name": cluster.volume_group_name,
6218 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6219 "file_storage_dir": cluster.file_storage_dir,
6220 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6221 "maintain_node_health": cluster.maintain_node_health,
6222 "ctime": cluster.ctime,
6223 "mtime": cluster.mtime,
6224 "uuid": cluster.uuid,
6225 "tags": list(cluster.GetTags()),
6226 "uid_pool": cluster.uid_pool,
6227 "default_iallocator": cluster.default_iallocator,
6228 "reserved_lvs": cluster.reserved_lvs,
6229 "primary_ip_version": primary_ip_version,
6230 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6231 "hidden_os": cluster.hidden_os,
6232 "blacklisted_os": cluster.blacklisted_os,
6238 class LUClusterConfigQuery(NoHooksLU):
6239 """Return configuration values.
6244 def CheckArguments(self):
6245 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6247 def ExpandNames(self):
6248 self.cq.ExpandNames(self)
6250 def DeclareLocks(self, level):
6251 self.cq.DeclareLocks(self, level)
6253 def Exec(self, feedback_fn):
6254 result = self.cq.OldStyleQuery(self)
6256 assert len(result) == 1
6261 class _ClusterQuery(_QueryBase):
6262 FIELDS = query.CLUSTER_FIELDS
6264 #: Do not sort (there is only one item)
6267 def ExpandNames(self, lu):
6268 lu.needed_locks = {}
6270 # The following variables interact with _QueryBase._GetNames
6271 self.wanted = locking.ALL_SET
6272 self.do_locking = self.use_locking
6275 raise errors.OpPrereqError("Can not use locking for cluster queries",
6278 def DeclareLocks(self, lu, level):
6281 def _GetQueryData(self, lu):
6282 """Computes the list of nodes and their attributes.
6285 # Locking is not used
6286 assert not (compat.any(lu.glm.is_owned(level)
6287 for level in locking.LEVELS
6288 if level != locking.LEVEL_CLUSTER) or
6289 self.do_locking or self.use_locking)
6291 if query.CQ_CONFIG in self.requested_data:
6292 cluster = lu.cfg.GetClusterInfo()
6294 cluster = NotImplemented
6296 if query.CQ_QUEUE_DRAINED in self.requested_data:
6297 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6299 drain_flag = NotImplemented
6301 if query.CQ_WATCHER_PAUSE in self.requested_data:
6302 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6304 watcher_pause = NotImplemented
6306 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6309 class LUInstanceActivateDisks(NoHooksLU):
6310 """Bring up an instance's disks.
6315 def ExpandNames(self):
6316 self._ExpandAndLockInstance()
6317 self.needed_locks[locking.LEVEL_NODE] = []
6318 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6320 def DeclareLocks(self, level):
6321 if level == locking.LEVEL_NODE:
6322 self._LockInstancesNodes()
6324 def CheckPrereq(self):
6325 """Check prerequisites.
6327 This checks that the instance is in the cluster.
6330 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6331 assert self.instance is not None, \
6332 "Cannot retrieve locked instance %s" % self.op.instance_name
6333 _CheckNodeOnline(self, self.instance.primary_node)
6335 def Exec(self, feedback_fn):
6336 """Activate the disks.
6339 disks_ok, disks_info = \
6340 _AssembleInstanceDisks(self, self.instance,
6341 ignore_size=self.op.ignore_size)
6343 raise errors.OpExecError("Cannot activate block devices")
6345 if self.op.wait_for_sync:
6346 if not _WaitForSync(self, self.instance):
6347 raise errors.OpExecError("Some disks of the instance are degraded!")
6352 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6354 """Prepare the block devices for an instance.
6356 This sets up the block devices on all nodes.
6358 @type lu: L{LogicalUnit}
6359 @param lu: the logical unit on whose behalf we execute
6360 @type instance: L{objects.Instance}
6361 @param instance: the instance for whose disks we assemble
6362 @type disks: list of L{objects.Disk} or None
6363 @param disks: which disks to assemble (or all, if None)
6364 @type ignore_secondaries: boolean
6365 @param ignore_secondaries: if true, errors on secondary nodes
6366 won't result in an error return from the function
6367 @type ignore_size: boolean
6368 @param ignore_size: if true, the current known size of the disk
6369 will not be used during the disk activation, useful for cases
6370 when the size is wrong
6371 @return: False if the operation failed, otherwise a list of
6372 (host, instance_visible_name, node_visible_name)
6373 with the mapping from node devices to instance devices
6378 iname = instance.name
6379 disks = _ExpandCheckDisks(instance, disks)
6381 # With the two passes mechanism we try to reduce the window of
6382 # opportunity for the race condition of switching DRBD to primary
6383 # before handshaking occured, but we do not eliminate it
6385 # The proper fix would be to wait (with some limits) until the
6386 # connection has been made and drbd transitions from WFConnection
6387 # into any other network-connected state (Connected, SyncTarget,
6390 # 1st pass, assemble on all nodes in secondary mode
6391 for idx, inst_disk in enumerate(disks):
6392 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6394 node_disk = node_disk.Copy()
6395 node_disk.UnsetSize()
6396 lu.cfg.SetDiskID(node_disk, node)
6397 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6399 msg = result.fail_msg
6401 is_offline_secondary = (node in instance.secondary_nodes and
6403 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6404 " (is_primary=False, pass=1): %s",
6405 inst_disk.iv_name, node, msg)
6406 if not (ignore_secondaries or is_offline_secondary):
6409 # FIXME: race condition on drbd migration to primary
6411 # 2nd pass, do only the primary node
6412 for idx, inst_disk in enumerate(disks):
6415 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6416 if node != instance.primary_node:
6419 node_disk = node_disk.Copy()
6420 node_disk.UnsetSize()
6421 lu.cfg.SetDiskID(node_disk, node)
6422 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6424 msg = result.fail_msg
6426 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6427 " (is_primary=True, pass=2): %s",
6428 inst_disk.iv_name, node, msg)
6431 dev_path = result.payload
6433 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6435 # leave the disks configured for the primary node
6436 # this is a workaround that would be fixed better by
6437 # improving the logical/physical id handling
6439 lu.cfg.SetDiskID(disk, instance.primary_node)
6441 return disks_ok, device_info
6444 def _StartInstanceDisks(lu, instance, force):
6445 """Start the disks of an instance.
6448 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6449 ignore_secondaries=force)
6451 _ShutdownInstanceDisks(lu, instance)
6452 if force is not None and not force:
6453 lu.proc.LogWarning("", hint="If the message above refers to a"
6455 " you can retry the operation using '--force'.")
6456 raise errors.OpExecError("Disk consistency error")
6459 class LUInstanceDeactivateDisks(NoHooksLU):
6460 """Shutdown an instance's disks.
6465 def ExpandNames(self):
6466 self._ExpandAndLockInstance()
6467 self.needed_locks[locking.LEVEL_NODE] = []
6468 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6470 def DeclareLocks(self, level):
6471 if level == locking.LEVEL_NODE:
6472 self._LockInstancesNodes()
6474 def CheckPrereq(self):
6475 """Check prerequisites.
6477 This checks that the instance is in the cluster.
6480 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6481 assert self.instance is not None, \
6482 "Cannot retrieve locked instance %s" % self.op.instance_name
6484 def Exec(self, feedback_fn):
6485 """Deactivate the disks
6488 instance = self.instance
6490 _ShutdownInstanceDisks(self, instance)
6492 _SafeShutdownInstanceDisks(self, instance)
6495 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6496 """Shutdown block devices of an instance.
6498 This function checks if an instance is running, before calling
6499 _ShutdownInstanceDisks.
6502 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6503 _ShutdownInstanceDisks(lu, instance, disks=disks)
6506 def _ExpandCheckDisks(instance, disks):
6507 """Return the instance disks selected by the disks list
6509 @type disks: list of L{objects.Disk} or None
6510 @param disks: selected disks
6511 @rtype: list of L{objects.Disk}
6512 @return: selected instance disks to act on
6516 return instance.disks
6518 if not set(disks).issubset(instance.disks):
6519 raise errors.ProgrammerError("Can only act on disks belonging to the"
6524 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6525 """Shutdown block devices of an instance.
6527 This does the shutdown on all nodes of the instance.
6529 If the ignore_primary is false, errors on the primary node are
6534 disks = _ExpandCheckDisks(instance, disks)
6537 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6538 lu.cfg.SetDiskID(top_disk, node)
6539 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6540 msg = result.fail_msg
6542 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6543 disk.iv_name, node, msg)
6544 if ((node == instance.primary_node and not ignore_primary) or
6545 (node != instance.primary_node and not result.offline)):
6550 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6551 """Checks if a node has enough free memory.
6553 This function check if a given node has the needed amount of free
6554 memory. In case the node has less memory or we cannot get the
6555 information from the node, this function raise an OpPrereqError
6558 @type lu: C{LogicalUnit}
6559 @param lu: a logical unit from which we get configuration data
6561 @param node: the node to check
6562 @type reason: C{str}
6563 @param reason: string to use in the error message
6564 @type requested: C{int}
6565 @param requested: the amount of memory in MiB to check for
6566 @type hypervisor_name: C{str}
6567 @param hypervisor_name: the hypervisor to ask for memory stats
6569 @return: node current free memory
6570 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6571 we cannot check the node
6574 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6575 nodeinfo[node].Raise("Can't get data from node %s" % node,
6576 prereq=True, ecode=errors.ECODE_ENVIRON)
6577 (_, _, (hv_info, )) = nodeinfo[node].payload
6579 free_mem = hv_info.get("memory_free", None)
6580 if not isinstance(free_mem, int):
6581 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6582 " was '%s'" % (node, free_mem),
6583 errors.ECODE_ENVIRON)
6584 if requested > free_mem:
6585 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6586 " needed %s MiB, available %s MiB" %
6587 (node, reason, requested, free_mem),
6592 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6593 """Checks if nodes have enough free disk space in the all VGs.
6595 This function check if all given nodes have the needed amount of
6596 free disk. In case any node has less disk or we cannot get the
6597 information from the node, this function raise an OpPrereqError
6600 @type lu: C{LogicalUnit}
6601 @param lu: a logical unit from which we get configuration data
6602 @type nodenames: C{list}
6603 @param nodenames: the list of node names to check
6604 @type req_sizes: C{dict}
6605 @param req_sizes: the hash of vg and corresponding amount of disk in
6607 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6608 or we cannot check the node
6611 for vg, req_size in req_sizes.items():
6612 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6615 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6616 """Checks if nodes have enough free disk space in the specified VG.
6618 This function check if all given nodes have the needed amount of
6619 free disk. In case any node has less disk or we cannot get the
6620 information from the node, this function raise an OpPrereqError
6623 @type lu: C{LogicalUnit}
6624 @param lu: a logical unit from which we get configuration data
6625 @type nodenames: C{list}
6626 @param nodenames: the list of node names to check
6628 @param vg: the volume group to check
6629 @type requested: C{int}
6630 @param requested: the amount of disk in MiB to check for
6631 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6632 or we cannot check the node
6635 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6636 for node in nodenames:
6637 info = nodeinfo[node]
6638 info.Raise("Cannot get current information from node %s" % node,
6639 prereq=True, ecode=errors.ECODE_ENVIRON)
6640 (_, (vg_info, ), _) = info.payload
6641 vg_free = vg_info.get("vg_free", None)
6642 if not isinstance(vg_free, int):
6643 raise errors.OpPrereqError("Can't compute free disk space on node"
6644 " %s for vg %s, result was '%s'" %
6645 (node, vg, vg_free), errors.ECODE_ENVIRON)
6646 if requested > vg_free:
6647 raise errors.OpPrereqError("Not enough disk space on target node %s"
6648 " vg %s: required %d MiB, available %d MiB" %
6649 (node, vg, requested, vg_free),
6653 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6654 """Checks if nodes have enough physical CPUs
6656 This function checks if all given nodes have the needed number of
6657 physical CPUs. In case any node has less CPUs or we cannot get the
6658 information from the node, this function raises an OpPrereqError
6661 @type lu: C{LogicalUnit}
6662 @param lu: a logical unit from which we get configuration data
6663 @type nodenames: C{list}
6664 @param nodenames: the list of node names to check
6665 @type requested: C{int}
6666 @param requested: the minimum acceptable number of physical CPUs
6667 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6668 or we cannot check the node
6671 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6672 for node in nodenames:
6673 info = nodeinfo[node]
6674 info.Raise("Cannot get current information from node %s" % node,
6675 prereq=True, ecode=errors.ECODE_ENVIRON)
6676 (_, _, (hv_info, )) = info.payload
6677 num_cpus = hv_info.get("cpu_total", None)
6678 if not isinstance(num_cpus, int):
6679 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6680 " on node %s, result was '%s'" %
6681 (node, num_cpus), errors.ECODE_ENVIRON)
6682 if requested > num_cpus:
6683 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6684 "required" % (node, num_cpus, requested),
6688 class LUInstanceStartup(LogicalUnit):
6689 """Starts an instance.
6692 HPATH = "instance-start"
6693 HTYPE = constants.HTYPE_INSTANCE
6696 def CheckArguments(self):
6698 if self.op.beparams:
6699 # fill the beparams dict
6700 objects.UpgradeBeParams(self.op.beparams)
6701 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6703 def ExpandNames(self):
6704 self._ExpandAndLockInstance()
6705 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6707 def DeclareLocks(self, level):
6708 if level == locking.LEVEL_NODE_RES:
6709 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6711 def BuildHooksEnv(self):
6714 This runs on master, primary and secondary nodes of the instance.
6718 "FORCE": self.op.force,
6721 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6725 def BuildHooksNodes(self):
6726 """Build hooks nodes.
6729 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6732 def CheckPrereq(self):
6733 """Check prerequisites.
6735 This checks that the instance is in the cluster.
6738 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6739 assert self.instance is not None, \
6740 "Cannot retrieve locked instance %s" % self.op.instance_name
6743 if self.op.hvparams:
6744 # check hypervisor parameter syntax (locally)
6745 cluster = self.cfg.GetClusterInfo()
6746 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6747 filled_hvp = cluster.FillHV(instance)
6748 filled_hvp.update(self.op.hvparams)
6749 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6750 hv_type.CheckParameterSyntax(filled_hvp)
6751 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6753 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6755 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6757 if self.primary_offline and self.op.ignore_offline_nodes:
6758 self.proc.LogWarning("Ignoring offline primary node")
6760 if self.op.hvparams or self.op.beparams:
6761 self.proc.LogWarning("Overridden parameters are ignored")
6763 _CheckNodeOnline(self, instance.primary_node)
6765 bep = self.cfg.GetClusterInfo().FillBE(instance)
6766 bep.update(self.op.beparams)
6768 # check bridges existence
6769 _CheckInstanceBridgesExist(self, instance)
6771 remote_info = self.rpc.call_instance_info(instance.primary_node,
6773 instance.hypervisor)
6774 remote_info.Raise("Error checking node %s" % instance.primary_node,
6775 prereq=True, ecode=errors.ECODE_ENVIRON)
6776 if not remote_info.payload: # not running already
6777 _CheckNodeFreeMemory(self, instance.primary_node,
6778 "starting instance %s" % instance.name,
6779 bep[constants.BE_MINMEM], instance.hypervisor)
6781 def Exec(self, feedback_fn):
6782 """Start the instance.
6785 instance = self.instance
6786 force = self.op.force
6788 if not self.op.no_remember:
6789 self.cfg.MarkInstanceUp(instance.name)
6791 if self.primary_offline:
6792 assert self.op.ignore_offline_nodes
6793 self.proc.LogInfo("Primary node offline, marked instance as started")
6795 node_current = instance.primary_node
6797 _StartInstanceDisks(self, instance, force)
6800 self.rpc.call_instance_start(node_current,
6801 (instance, self.op.hvparams,
6803 self.op.startup_paused)
6804 msg = result.fail_msg
6806 _ShutdownInstanceDisks(self, instance)
6807 raise errors.OpExecError("Could not start instance: %s" % msg)
6810 class LUInstanceReboot(LogicalUnit):
6811 """Reboot an instance.
6814 HPATH = "instance-reboot"
6815 HTYPE = constants.HTYPE_INSTANCE
6818 def ExpandNames(self):
6819 self._ExpandAndLockInstance()
6821 def BuildHooksEnv(self):
6824 This runs on master, primary and secondary nodes of the instance.
6828 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6829 "REBOOT_TYPE": self.op.reboot_type,
6830 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6833 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6837 def BuildHooksNodes(self):
6838 """Build hooks nodes.
6841 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6844 def CheckPrereq(self):
6845 """Check prerequisites.
6847 This checks that the instance is in the cluster.
6850 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6851 assert self.instance is not None, \
6852 "Cannot retrieve locked instance %s" % self.op.instance_name
6853 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6854 _CheckNodeOnline(self, instance.primary_node)
6856 # check bridges existence
6857 _CheckInstanceBridgesExist(self, instance)
6859 def Exec(self, feedback_fn):
6860 """Reboot the instance.
6863 instance = self.instance
6864 ignore_secondaries = self.op.ignore_secondaries
6865 reboot_type = self.op.reboot_type
6867 remote_info = self.rpc.call_instance_info(instance.primary_node,
6869 instance.hypervisor)
6870 remote_info.Raise("Error checking node %s" % instance.primary_node)
6871 instance_running = bool(remote_info.payload)
6873 node_current = instance.primary_node
6875 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6876 constants.INSTANCE_REBOOT_HARD]:
6877 for disk in instance.disks:
6878 self.cfg.SetDiskID(disk, node_current)
6879 result = self.rpc.call_instance_reboot(node_current, instance,
6881 self.op.shutdown_timeout)
6882 result.Raise("Could not reboot instance")
6884 if instance_running:
6885 result = self.rpc.call_instance_shutdown(node_current, instance,
6886 self.op.shutdown_timeout)
6887 result.Raise("Could not shutdown instance for full reboot")
6888 _ShutdownInstanceDisks(self, instance)
6890 self.LogInfo("Instance %s was already stopped, starting now",
6892 _StartInstanceDisks(self, instance, ignore_secondaries)
6893 result = self.rpc.call_instance_start(node_current,
6894 (instance, None, None), False)
6895 msg = result.fail_msg
6897 _ShutdownInstanceDisks(self, instance)
6898 raise errors.OpExecError("Could not start instance for"
6899 " full reboot: %s" % msg)
6901 self.cfg.MarkInstanceUp(instance.name)
6904 class LUInstanceShutdown(LogicalUnit):
6905 """Shutdown an instance.
6908 HPATH = "instance-stop"
6909 HTYPE = constants.HTYPE_INSTANCE
6912 def ExpandNames(self):
6913 self._ExpandAndLockInstance()
6915 def BuildHooksEnv(self):
6918 This runs on master, primary and secondary nodes of the instance.
6921 env = _BuildInstanceHookEnvByObject(self, self.instance)
6922 env["TIMEOUT"] = self.op.timeout
6925 def BuildHooksNodes(self):
6926 """Build hooks nodes.
6929 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6932 def CheckPrereq(self):
6933 """Check prerequisites.
6935 This checks that the instance is in the cluster.
6938 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6939 assert self.instance is not None, \
6940 "Cannot retrieve locked instance %s" % self.op.instance_name
6942 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6944 self.primary_offline = \
6945 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6947 if self.primary_offline and self.op.ignore_offline_nodes:
6948 self.proc.LogWarning("Ignoring offline primary node")
6950 _CheckNodeOnline(self, self.instance.primary_node)
6952 def Exec(self, feedback_fn):
6953 """Shutdown the instance.
6956 instance = self.instance
6957 node_current = instance.primary_node
6958 timeout = self.op.timeout
6960 if not self.op.no_remember:
6961 self.cfg.MarkInstanceDown(instance.name)
6963 if self.primary_offline:
6964 assert self.op.ignore_offline_nodes
6965 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6967 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6968 msg = result.fail_msg
6970 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6972 _ShutdownInstanceDisks(self, instance)
6975 class LUInstanceReinstall(LogicalUnit):
6976 """Reinstall an instance.
6979 HPATH = "instance-reinstall"
6980 HTYPE = constants.HTYPE_INSTANCE
6983 def ExpandNames(self):
6984 self._ExpandAndLockInstance()
6986 def BuildHooksEnv(self):
6989 This runs on master, primary and secondary nodes of the instance.
6992 return _BuildInstanceHookEnvByObject(self, self.instance)
6994 def BuildHooksNodes(self):
6995 """Build hooks nodes.
6998 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7001 def CheckPrereq(self):
7002 """Check prerequisites.
7004 This checks that the instance is in the cluster and is not running.
7007 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7008 assert instance is not None, \
7009 "Cannot retrieve locked instance %s" % self.op.instance_name
7010 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7011 " offline, cannot reinstall")
7013 if instance.disk_template == constants.DT_DISKLESS:
7014 raise errors.OpPrereqError("Instance '%s' has no disks" %
7015 self.op.instance_name,
7017 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7019 if self.op.os_type is not None:
7021 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7022 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7023 instance_os = self.op.os_type
7025 instance_os = instance.os
7027 nodelist = list(instance.all_nodes)
7029 if self.op.osparams:
7030 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7031 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7032 self.os_inst = i_osdict # the new dict (without defaults)
7036 self.instance = instance
7038 def Exec(self, feedback_fn):
7039 """Reinstall the instance.
7042 inst = self.instance
7044 if self.op.os_type is not None:
7045 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7046 inst.os = self.op.os_type
7047 # Write to configuration
7048 self.cfg.Update(inst, feedback_fn)
7050 _StartInstanceDisks(self, inst, None)
7052 feedback_fn("Running the instance OS create scripts...")
7053 # FIXME: pass debug option from opcode to backend
7054 result = self.rpc.call_instance_os_add(inst.primary_node,
7055 (inst, self.os_inst), True,
7056 self.op.debug_level)
7057 result.Raise("Could not install OS for instance %s on node %s" %
7058 (inst.name, inst.primary_node))
7060 _ShutdownInstanceDisks(self, inst)
7063 class LUInstanceRecreateDisks(LogicalUnit):
7064 """Recreate an instance's missing disks.
7067 HPATH = "instance-recreate-disks"
7068 HTYPE = constants.HTYPE_INSTANCE
7071 _MODIFYABLE = frozenset([
7072 constants.IDISK_SIZE,
7073 constants.IDISK_MODE,
7076 # New or changed disk parameters may have different semantics
7077 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7078 constants.IDISK_ADOPT,
7080 # TODO: Implement support changing VG while recreating
7082 constants.IDISK_METAVG,
7085 def CheckArguments(self):
7086 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7087 # Normalize and convert deprecated list of disk indices
7088 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7090 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7092 raise errors.OpPrereqError("Some disks have been specified more than"
7093 " once: %s" % utils.CommaJoin(duplicates),
7096 for (idx, params) in self.op.disks:
7097 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7098 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7100 raise errors.OpPrereqError("Parameters for disk %s try to change"
7101 " unmodifyable parameter(s): %s" %
7102 (idx, utils.CommaJoin(unsupported)),
7105 def ExpandNames(self):
7106 self._ExpandAndLockInstance()
7107 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7109 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7110 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7112 self.needed_locks[locking.LEVEL_NODE] = []
7113 self.needed_locks[locking.LEVEL_NODE_RES] = []
7115 def DeclareLocks(self, level):
7116 if level == locking.LEVEL_NODE:
7117 # if we replace the nodes, we only need to lock the old primary,
7118 # otherwise we need to lock all nodes for disk re-creation
7119 primary_only = bool(self.op.nodes)
7120 self._LockInstancesNodes(primary_only=primary_only)
7121 elif level == locking.LEVEL_NODE_RES:
7123 self.needed_locks[locking.LEVEL_NODE_RES] = \
7124 self.needed_locks[locking.LEVEL_NODE][:]
7126 def BuildHooksEnv(self):
7129 This runs on master, primary and secondary nodes of the instance.
7132 return _BuildInstanceHookEnvByObject(self, self.instance)
7134 def BuildHooksNodes(self):
7135 """Build hooks nodes.
7138 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7141 def CheckPrereq(self):
7142 """Check prerequisites.
7144 This checks that the instance is in the cluster and is not running.
7147 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7148 assert instance is not None, \
7149 "Cannot retrieve locked instance %s" % self.op.instance_name
7151 if len(self.op.nodes) != len(instance.all_nodes):
7152 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7153 " %d replacement nodes were specified" %
7154 (instance.name, len(instance.all_nodes),
7155 len(self.op.nodes)),
7157 assert instance.disk_template != constants.DT_DRBD8 or \
7158 len(self.op.nodes) == 2
7159 assert instance.disk_template != constants.DT_PLAIN or \
7160 len(self.op.nodes) == 1
7161 primary_node = self.op.nodes[0]
7163 primary_node = instance.primary_node
7164 _CheckNodeOnline(self, primary_node)
7166 if instance.disk_template == constants.DT_DISKLESS:
7167 raise errors.OpPrereqError("Instance '%s' has no disks" %
7168 self.op.instance_name, errors.ECODE_INVAL)
7170 # if we replace nodes *and* the old primary is offline, we don't
7172 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7173 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7174 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7175 if not (self.op.nodes and old_pnode.offline):
7176 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7177 msg="cannot recreate disks")
7180 self.disks = dict(self.op.disks)
7182 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7184 maxidx = max(self.disks.keys())
7185 if maxidx >= len(instance.disks):
7186 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7189 if (self.op.nodes and
7190 sorted(self.disks.keys()) != range(len(instance.disks))):
7191 raise errors.OpPrereqError("Can't recreate disks partially and"
7192 " change the nodes at the same time",
7195 self.instance = instance
7197 def Exec(self, feedback_fn):
7198 """Recreate the disks.
7201 instance = self.instance
7203 assert (self.owned_locks(locking.LEVEL_NODE) ==
7204 self.owned_locks(locking.LEVEL_NODE_RES))
7207 mods = [] # keeps track of needed changes
7209 for idx, disk in enumerate(instance.disks):
7211 changes = self.disks[idx]
7213 # Disk should not be recreated
7217 # update secondaries for disks, if needed
7218 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7219 # need to update the nodes and minors
7220 assert len(self.op.nodes) == 2
7221 assert len(disk.logical_id) == 6 # otherwise disk internals
7223 (_, _, old_port, _, _, old_secret) = disk.logical_id
7224 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7225 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7226 new_minors[0], new_minors[1], old_secret)
7227 assert len(disk.logical_id) == len(new_id)
7231 mods.append((idx, new_id, changes))
7233 # now that we have passed all asserts above, we can apply the mods
7234 # in a single run (to avoid partial changes)
7235 for idx, new_id, changes in mods:
7236 disk = instance.disks[idx]
7237 if new_id is not None:
7238 assert disk.dev_type == constants.LD_DRBD8
7239 disk.logical_id = new_id
7241 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7242 mode=changes.get(constants.IDISK_MODE, None))
7244 # change primary node, if needed
7246 instance.primary_node = self.op.nodes[0]
7247 self.LogWarning("Changing the instance's nodes, you will have to"
7248 " remove any disks left on the older nodes manually")
7251 self.cfg.Update(instance, feedback_fn)
7253 _CreateDisks(self, instance, to_skip=to_skip)
7256 class LUInstanceRename(LogicalUnit):
7257 """Rename an instance.
7260 HPATH = "instance-rename"
7261 HTYPE = constants.HTYPE_INSTANCE
7263 def CheckArguments(self):
7267 if self.op.ip_check and not self.op.name_check:
7268 # TODO: make the ip check more flexible and not depend on the name check
7269 raise errors.OpPrereqError("IP address check requires a name check",
7272 def BuildHooksEnv(self):
7275 This runs on master, primary and secondary nodes of the instance.
7278 env = _BuildInstanceHookEnvByObject(self, self.instance)
7279 env["INSTANCE_NEW_NAME"] = self.op.new_name
7282 def BuildHooksNodes(self):
7283 """Build hooks nodes.
7286 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7289 def CheckPrereq(self):
7290 """Check prerequisites.
7292 This checks that the instance is in the cluster and is not running.
7295 self.op.instance_name = _ExpandInstanceName(self.cfg,
7296 self.op.instance_name)
7297 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7298 assert instance is not None
7299 _CheckNodeOnline(self, instance.primary_node)
7300 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7301 msg="cannot rename")
7302 self.instance = instance
7304 new_name = self.op.new_name
7305 if self.op.name_check:
7306 hostname = netutils.GetHostname(name=new_name)
7307 if hostname.name != new_name:
7308 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7310 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7311 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7312 " same as given hostname '%s'") %
7313 (hostname.name, self.op.new_name),
7315 new_name = self.op.new_name = hostname.name
7316 if (self.op.ip_check and
7317 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7318 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7319 (hostname.ip, new_name),
7320 errors.ECODE_NOTUNIQUE)
7322 instance_list = self.cfg.GetInstanceList()
7323 if new_name in instance_list and new_name != instance.name:
7324 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7325 new_name, errors.ECODE_EXISTS)
7327 def Exec(self, feedback_fn):
7328 """Rename the instance.
7331 inst = self.instance
7332 old_name = inst.name
7334 rename_file_storage = False
7335 if (inst.disk_template in constants.DTS_FILEBASED and
7336 self.op.new_name != inst.name):
7337 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7338 rename_file_storage = True
7340 self.cfg.RenameInstance(inst.name, self.op.new_name)
7341 # Change the instance lock. This is definitely safe while we hold the BGL.
7342 # Otherwise the new lock would have to be added in acquired mode.
7344 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7345 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7347 # re-read the instance from the configuration after rename
7348 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7350 if rename_file_storage:
7351 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7352 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7353 old_file_storage_dir,
7354 new_file_storage_dir)
7355 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7356 " (but the instance has been renamed in Ganeti)" %
7357 (inst.primary_node, old_file_storage_dir,
7358 new_file_storage_dir))
7360 _StartInstanceDisks(self, inst, None)
7362 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7363 old_name, self.op.debug_level)
7364 msg = result.fail_msg
7366 msg = ("Could not run OS rename script for instance %s on node %s"
7367 " (but the instance has been renamed in Ganeti): %s" %
7368 (inst.name, inst.primary_node, msg))
7369 self.proc.LogWarning(msg)
7371 _ShutdownInstanceDisks(self, inst)
7376 class LUInstanceRemove(LogicalUnit):
7377 """Remove an instance.
7380 HPATH = "instance-remove"
7381 HTYPE = constants.HTYPE_INSTANCE
7384 def ExpandNames(self):
7385 self._ExpandAndLockInstance()
7386 self.needed_locks[locking.LEVEL_NODE] = []
7387 self.needed_locks[locking.LEVEL_NODE_RES] = []
7388 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7390 def DeclareLocks(self, level):
7391 if level == locking.LEVEL_NODE:
7392 self._LockInstancesNodes()
7393 elif level == locking.LEVEL_NODE_RES:
7395 self.needed_locks[locking.LEVEL_NODE_RES] = \
7396 self.needed_locks[locking.LEVEL_NODE][:]
7398 def BuildHooksEnv(self):
7401 This runs on master, primary and secondary nodes of the instance.
7404 env = _BuildInstanceHookEnvByObject(self, self.instance)
7405 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7408 def BuildHooksNodes(self):
7409 """Build hooks nodes.
7412 nl = [self.cfg.GetMasterNode()]
7413 nl_post = list(self.instance.all_nodes) + nl
7414 return (nl, nl_post)
7416 def CheckPrereq(self):
7417 """Check prerequisites.
7419 This checks that the instance is in the cluster.
7422 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7423 assert self.instance is not None, \
7424 "Cannot retrieve locked instance %s" % self.op.instance_name
7426 def Exec(self, feedback_fn):
7427 """Remove the instance.
7430 instance = self.instance
7431 logging.info("Shutting down instance %s on node %s",
7432 instance.name, instance.primary_node)
7434 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7435 self.op.shutdown_timeout)
7436 msg = result.fail_msg
7438 if self.op.ignore_failures:
7439 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7441 raise errors.OpExecError("Could not shutdown instance %s on"
7443 (instance.name, instance.primary_node, msg))
7445 assert (self.owned_locks(locking.LEVEL_NODE) ==
7446 self.owned_locks(locking.LEVEL_NODE_RES))
7447 assert not (set(instance.all_nodes) -
7448 self.owned_locks(locking.LEVEL_NODE)), \
7449 "Not owning correct locks"
7451 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7454 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7455 """Utility function to remove an instance.
7458 logging.info("Removing block devices for instance %s", instance.name)
7460 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7461 if not ignore_failures:
7462 raise errors.OpExecError("Can't remove instance's disks")
7463 feedback_fn("Warning: can't remove instance's disks")
7465 logging.info("Removing instance %s out of cluster config", instance.name)
7467 lu.cfg.RemoveInstance(instance.name)
7469 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7470 "Instance lock removal conflict"
7472 # Remove lock for the instance
7473 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7476 class LUInstanceQuery(NoHooksLU):
7477 """Logical unit for querying instances.
7480 # pylint: disable=W0142
7483 def CheckArguments(self):
7484 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7485 self.op.output_fields, self.op.use_locking)
7487 def ExpandNames(self):
7488 self.iq.ExpandNames(self)
7490 def DeclareLocks(self, level):
7491 self.iq.DeclareLocks(self, level)
7493 def Exec(self, feedback_fn):
7494 return self.iq.OldStyleQuery(self)
7497 class LUInstanceFailover(LogicalUnit):
7498 """Failover an instance.
7501 HPATH = "instance-failover"
7502 HTYPE = constants.HTYPE_INSTANCE
7505 def CheckArguments(self):
7506 """Check the arguments.
7509 self.iallocator = getattr(self.op, "iallocator", None)
7510 self.target_node = getattr(self.op, "target_node", None)
7512 def ExpandNames(self):
7513 self._ExpandAndLockInstance()
7515 if self.op.target_node is not None:
7516 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7518 self.needed_locks[locking.LEVEL_NODE] = []
7519 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7521 self.needed_locks[locking.LEVEL_NODE_RES] = []
7522 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7524 ignore_consistency = self.op.ignore_consistency
7525 shutdown_timeout = self.op.shutdown_timeout
7526 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7529 ignore_consistency=ignore_consistency,
7530 shutdown_timeout=shutdown_timeout,
7531 ignore_ipolicy=self.op.ignore_ipolicy)
7532 self.tasklets = [self._migrater]
7534 def DeclareLocks(self, level):
7535 if level == locking.LEVEL_NODE:
7536 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7537 if instance.disk_template in constants.DTS_EXT_MIRROR:
7538 if self.op.target_node is None:
7539 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7541 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7542 self.op.target_node]
7543 del self.recalculate_locks[locking.LEVEL_NODE]
7545 self._LockInstancesNodes()
7546 elif level == locking.LEVEL_NODE_RES:
7548 self.needed_locks[locking.LEVEL_NODE_RES] = \
7549 self.needed_locks[locking.LEVEL_NODE][:]
7551 def BuildHooksEnv(self):
7554 This runs on master, primary and secondary nodes of the instance.
7557 instance = self._migrater.instance
7558 source_node = instance.primary_node
7559 target_node = self.op.target_node
7561 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7562 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7563 "OLD_PRIMARY": source_node,
7564 "NEW_PRIMARY": target_node,
7567 if instance.disk_template in constants.DTS_INT_MIRROR:
7568 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7569 env["NEW_SECONDARY"] = source_node
7571 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7573 env.update(_BuildInstanceHookEnvByObject(self, instance))
7577 def BuildHooksNodes(self):
7578 """Build hooks nodes.
7581 instance = self._migrater.instance
7582 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7583 return (nl, nl + [instance.primary_node])
7586 class LUInstanceMigrate(LogicalUnit):
7587 """Migrate an instance.
7589 This is migration without shutting down, compared to the failover,
7590 which is done with shutdown.
7593 HPATH = "instance-migrate"
7594 HTYPE = constants.HTYPE_INSTANCE
7597 def ExpandNames(self):
7598 self._ExpandAndLockInstance()
7600 if self.op.target_node is not None:
7601 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7603 self.needed_locks[locking.LEVEL_NODE] = []
7604 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7606 self.needed_locks[locking.LEVEL_NODE] = []
7607 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7610 TLMigrateInstance(self, self.op.instance_name,
7611 cleanup=self.op.cleanup,
7613 fallback=self.op.allow_failover,
7614 allow_runtime_changes=self.op.allow_runtime_changes,
7615 ignore_ipolicy=self.op.ignore_ipolicy)
7616 self.tasklets = [self._migrater]
7618 def DeclareLocks(self, level):
7619 if level == locking.LEVEL_NODE:
7620 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7621 if instance.disk_template in constants.DTS_EXT_MIRROR:
7622 if self.op.target_node is None:
7623 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7625 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7626 self.op.target_node]
7627 del self.recalculate_locks[locking.LEVEL_NODE]
7629 self._LockInstancesNodes()
7630 elif level == locking.LEVEL_NODE_RES:
7632 self.needed_locks[locking.LEVEL_NODE_RES] = \
7633 self.needed_locks[locking.LEVEL_NODE][:]
7635 def BuildHooksEnv(self):
7638 This runs on master, primary and secondary nodes of the instance.
7641 instance = self._migrater.instance
7642 source_node = instance.primary_node
7643 target_node = self.op.target_node
7644 env = _BuildInstanceHookEnvByObject(self, instance)
7646 "MIGRATE_LIVE": self._migrater.live,
7647 "MIGRATE_CLEANUP": self.op.cleanup,
7648 "OLD_PRIMARY": source_node,
7649 "NEW_PRIMARY": target_node,
7650 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7653 if instance.disk_template in constants.DTS_INT_MIRROR:
7654 env["OLD_SECONDARY"] = target_node
7655 env["NEW_SECONDARY"] = source_node
7657 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7661 def BuildHooksNodes(self):
7662 """Build hooks nodes.
7665 instance = self._migrater.instance
7666 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7667 return (nl, nl + [instance.primary_node])
7670 class LUInstanceMove(LogicalUnit):
7671 """Move an instance by data-copying.
7674 HPATH = "instance-move"
7675 HTYPE = constants.HTYPE_INSTANCE
7678 def ExpandNames(self):
7679 self._ExpandAndLockInstance()
7680 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7681 self.op.target_node = target_node
7682 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7683 self.needed_locks[locking.LEVEL_NODE_RES] = []
7684 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7686 def DeclareLocks(self, level):
7687 if level == locking.LEVEL_NODE:
7688 self._LockInstancesNodes(primary_only=True)
7689 elif level == locking.LEVEL_NODE_RES:
7691 self.needed_locks[locking.LEVEL_NODE_RES] = \
7692 self.needed_locks[locking.LEVEL_NODE][:]
7694 def BuildHooksEnv(self):
7697 This runs on master, primary and secondary nodes of the instance.
7701 "TARGET_NODE": self.op.target_node,
7702 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7704 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7707 def BuildHooksNodes(self):
7708 """Build hooks nodes.
7712 self.cfg.GetMasterNode(),
7713 self.instance.primary_node,
7714 self.op.target_node,
7718 def CheckPrereq(self):
7719 """Check prerequisites.
7721 This checks that the instance is in the cluster.
7724 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7725 assert self.instance is not None, \
7726 "Cannot retrieve locked instance %s" % self.op.instance_name
7728 node = self.cfg.GetNodeInfo(self.op.target_node)
7729 assert node is not None, \
7730 "Cannot retrieve locked node %s" % self.op.target_node
7732 self.target_node = target_node = node.name
7734 if target_node == instance.primary_node:
7735 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7736 (instance.name, target_node),
7739 bep = self.cfg.GetClusterInfo().FillBE(instance)
7741 for idx, dsk in enumerate(instance.disks):
7742 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7743 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7744 " cannot copy" % idx, errors.ECODE_STATE)
7746 _CheckNodeOnline(self, target_node)
7747 _CheckNodeNotDrained(self, target_node)
7748 _CheckNodeVmCapable(self, target_node)
7749 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7750 self.cfg.GetNodeGroup(node.group))
7751 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7752 ignore=self.op.ignore_ipolicy)
7754 if instance.admin_state == constants.ADMINST_UP:
7755 # check memory requirements on the secondary node
7756 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7757 instance.name, bep[constants.BE_MAXMEM],
7758 instance.hypervisor)
7760 self.LogInfo("Not checking memory on the secondary node as"
7761 " instance will not be started")
7763 # check bridge existance
7764 _CheckInstanceBridgesExist(self, instance, node=target_node)
7766 def Exec(self, feedback_fn):
7767 """Move an instance.
7769 The move is done by shutting it down on its present node, copying
7770 the data over (slow) and starting it on the new node.
7773 instance = self.instance
7775 source_node = instance.primary_node
7776 target_node = self.target_node
7778 self.LogInfo("Shutting down instance %s on source node %s",
7779 instance.name, source_node)
7781 assert (self.owned_locks(locking.LEVEL_NODE) ==
7782 self.owned_locks(locking.LEVEL_NODE_RES))
7784 result = self.rpc.call_instance_shutdown(source_node, instance,
7785 self.op.shutdown_timeout)
7786 msg = result.fail_msg
7788 if self.op.ignore_consistency:
7789 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7790 " Proceeding anyway. Please make sure node"
7791 " %s is down. Error details: %s",
7792 instance.name, source_node, source_node, msg)
7794 raise errors.OpExecError("Could not shutdown instance %s on"
7796 (instance.name, source_node, msg))
7798 # create the target disks
7800 _CreateDisks(self, instance, target_node=target_node)
7801 except errors.OpExecError:
7802 self.LogWarning("Device creation failed, reverting...")
7804 _RemoveDisks(self, instance, target_node=target_node)
7806 self.cfg.ReleaseDRBDMinors(instance.name)
7809 cluster_name = self.cfg.GetClusterInfo().cluster_name
7812 # activate, get path, copy the data over
7813 for idx, disk in enumerate(instance.disks):
7814 self.LogInfo("Copying data for disk %d", idx)
7815 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7816 instance.name, True, idx)
7818 self.LogWarning("Can't assemble newly created disk %d: %s",
7819 idx, result.fail_msg)
7820 errs.append(result.fail_msg)
7822 dev_path = result.payload
7823 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7824 target_node, dev_path,
7827 self.LogWarning("Can't copy data over for disk %d: %s",
7828 idx, result.fail_msg)
7829 errs.append(result.fail_msg)
7833 self.LogWarning("Some disks failed to copy, aborting")
7835 _RemoveDisks(self, instance, target_node=target_node)
7837 self.cfg.ReleaseDRBDMinors(instance.name)
7838 raise errors.OpExecError("Errors during disk copy: %s" %
7841 instance.primary_node = target_node
7842 self.cfg.Update(instance, feedback_fn)
7844 self.LogInfo("Removing the disks on the original node")
7845 _RemoveDisks(self, instance, target_node=source_node)
7847 # Only start the instance if it's marked as up
7848 if instance.admin_state == constants.ADMINST_UP:
7849 self.LogInfo("Starting instance %s on node %s",
7850 instance.name, target_node)
7852 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7853 ignore_secondaries=True)
7855 _ShutdownInstanceDisks(self, instance)
7856 raise errors.OpExecError("Can't activate the instance's disks")
7858 result = self.rpc.call_instance_start(target_node,
7859 (instance, None, None), False)
7860 msg = result.fail_msg
7862 _ShutdownInstanceDisks(self, instance)
7863 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7864 (instance.name, target_node, msg))
7867 class LUNodeMigrate(LogicalUnit):
7868 """Migrate all instances from a node.
7871 HPATH = "node-migrate"
7872 HTYPE = constants.HTYPE_NODE
7875 def CheckArguments(self):
7878 def ExpandNames(self):
7879 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7881 self.share_locks = _ShareAll()
7882 self.needed_locks = {
7883 locking.LEVEL_NODE: [self.op.node_name],
7886 def BuildHooksEnv(self):
7889 This runs on the master, the primary and all the secondaries.
7893 "NODE_NAME": self.op.node_name,
7894 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7897 def BuildHooksNodes(self):
7898 """Build hooks nodes.
7901 nl = [self.cfg.GetMasterNode()]
7904 def CheckPrereq(self):
7907 def Exec(self, feedback_fn):
7908 # Prepare jobs for migration instances
7909 allow_runtime_changes = self.op.allow_runtime_changes
7911 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7914 iallocator=self.op.iallocator,
7915 target_node=self.op.target_node,
7916 allow_runtime_changes=allow_runtime_changes,
7917 ignore_ipolicy=self.op.ignore_ipolicy)]
7918 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7921 # TODO: Run iallocator in this opcode and pass correct placement options to
7922 # OpInstanceMigrate. Since other jobs can modify the cluster between
7923 # running the iallocator and the actual migration, a good consistency model
7924 # will have to be found.
7926 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7927 frozenset([self.op.node_name]))
7929 return ResultWithJobs(jobs)
7932 class TLMigrateInstance(Tasklet):
7933 """Tasklet class for instance migration.
7936 @ivar live: whether the migration will be done live or non-live;
7937 this variable is initalized only after CheckPrereq has run
7938 @type cleanup: boolean
7939 @ivar cleanup: Wheater we cleanup from a failed migration
7940 @type iallocator: string
7941 @ivar iallocator: The iallocator used to determine target_node
7942 @type target_node: string
7943 @ivar target_node: If given, the target_node to reallocate the instance to
7944 @type failover: boolean
7945 @ivar failover: Whether operation results in failover or migration
7946 @type fallback: boolean
7947 @ivar fallback: Whether fallback to failover is allowed if migration not
7949 @type ignore_consistency: boolean
7950 @ivar ignore_consistency: Wheter we should ignore consistency between source
7952 @type shutdown_timeout: int
7953 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7954 @type ignore_ipolicy: bool
7955 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7960 _MIGRATION_POLL_INTERVAL = 1 # seconds
7961 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7963 def __init__(self, lu, instance_name, cleanup=False,
7964 failover=False, fallback=False,
7965 ignore_consistency=False,
7966 allow_runtime_changes=True,
7967 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7968 ignore_ipolicy=False):
7969 """Initializes this class.
7972 Tasklet.__init__(self, lu)
7975 self.instance_name = instance_name
7976 self.cleanup = cleanup
7977 self.live = False # will be overridden later
7978 self.failover = failover
7979 self.fallback = fallback
7980 self.ignore_consistency = ignore_consistency
7981 self.shutdown_timeout = shutdown_timeout
7982 self.ignore_ipolicy = ignore_ipolicy
7983 self.allow_runtime_changes = allow_runtime_changes
7985 def CheckPrereq(self):
7986 """Check prerequisites.
7988 This checks that the instance is in the cluster.
7991 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7992 instance = self.cfg.GetInstanceInfo(instance_name)
7993 assert instance is not None
7994 self.instance = instance
7995 cluster = self.cfg.GetClusterInfo()
7997 if (not self.cleanup and
7998 not instance.admin_state == constants.ADMINST_UP and
7999 not self.failover and self.fallback):
8000 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8001 " switching to failover")
8002 self.failover = True
8004 if instance.disk_template not in constants.DTS_MIRRORED:
8009 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8010 " %s" % (instance.disk_template, text),
8013 if instance.disk_template in constants.DTS_EXT_MIRROR:
8014 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8016 if self.lu.op.iallocator:
8017 self._RunAllocator()
8019 # We set set self.target_node as it is required by
8021 self.target_node = self.lu.op.target_node
8023 # Check that the target node is correct in terms of instance policy
8024 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8025 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8026 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8027 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8028 ignore=self.ignore_ipolicy)
8030 # self.target_node is already populated, either directly or by the
8032 target_node = self.target_node
8033 if self.target_node == instance.primary_node:
8034 raise errors.OpPrereqError("Cannot migrate instance %s"
8035 " to its primary (%s)" %
8036 (instance.name, instance.primary_node))
8038 if len(self.lu.tasklets) == 1:
8039 # It is safe to release locks only when we're the only tasklet
8041 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8042 keep=[instance.primary_node, self.target_node])
8045 secondary_nodes = instance.secondary_nodes
8046 if not secondary_nodes:
8047 raise errors.ConfigurationError("No secondary node but using"
8048 " %s disk template" %
8049 instance.disk_template)
8050 target_node = secondary_nodes[0]
8051 if self.lu.op.iallocator or (self.lu.op.target_node and
8052 self.lu.op.target_node != target_node):
8054 text = "failed over"
8057 raise errors.OpPrereqError("Instances with disk template %s cannot"
8058 " be %s to arbitrary nodes"
8059 " (neither an iallocator nor a target"
8060 " node can be passed)" %
8061 (instance.disk_template, text),
8063 nodeinfo = self.cfg.GetNodeInfo(target_node)
8064 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8065 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8066 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8067 ignore=self.ignore_ipolicy)
8069 i_be = cluster.FillBE(instance)
8071 # check memory requirements on the secondary node
8072 if (not self.cleanup and
8073 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8074 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8075 "migrating instance %s" %
8077 i_be[constants.BE_MINMEM],
8078 instance.hypervisor)
8080 self.lu.LogInfo("Not checking memory on the secondary node as"
8081 " instance will not be started")
8083 # check if failover must be forced instead of migration
8084 if (not self.cleanup and not self.failover and
8085 i_be[constants.BE_ALWAYS_FAILOVER]):
8087 self.lu.LogInfo("Instance configured to always failover; fallback"
8089 self.failover = True
8091 raise errors.OpPrereqError("This instance has been configured to"
8092 " always failover, please allow failover",
8095 # check bridge existance
8096 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8098 if not self.cleanup:
8099 _CheckNodeNotDrained(self.lu, target_node)
8100 if not self.failover:
8101 result = self.rpc.call_instance_migratable(instance.primary_node,
8103 if result.fail_msg and self.fallback:
8104 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8106 self.failover = True
8108 result.Raise("Can't migrate, please use failover",
8109 prereq=True, ecode=errors.ECODE_STATE)
8111 assert not (self.failover and self.cleanup)
8113 if not self.failover:
8114 if self.lu.op.live is not None and self.lu.op.mode is not None:
8115 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8116 " parameters are accepted",
8118 if self.lu.op.live is not None:
8120 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8122 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8123 # reset the 'live' parameter to None so that repeated
8124 # invocations of CheckPrereq do not raise an exception
8125 self.lu.op.live = None
8126 elif self.lu.op.mode is None:
8127 # read the default value from the hypervisor
8128 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8129 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8131 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8133 # Failover is never live
8136 if not (self.failover or self.cleanup):
8137 remote_info = self.rpc.call_instance_info(instance.primary_node,
8139 instance.hypervisor)
8140 remote_info.Raise("Error checking instance on node %s" %
8141 instance.primary_node)
8142 instance_running = bool(remote_info.payload)
8143 if instance_running:
8144 self.current_mem = int(remote_info.payload["memory"])
8146 def _RunAllocator(self):
8147 """Run the allocator based on input opcode.
8150 # FIXME: add a self.ignore_ipolicy option
8151 ial = IAllocator(self.cfg, self.rpc,
8152 mode=constants.IALLOCATOR_MODE_RELOC,
8153 name=self.instance_name,
8154 relocate_from=[self.instance.primary_node],
8157 ial.Run(self.lu.op.iallocator)
8160 raise errors.OpPrereqError("Can't compute nodes using"
8161 " iallocator '%s': %s" %
8162 (self.lu.op.iallocator, ial.info),
8164 if len(ial.result) != ial.required_nodes:
8165 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8166 " of nodes (%s), required %s" %
8167 (self.lu.op.iallocator, len(ial.result),
8168 ial.required_nodes), errors.ECODE_FAULT)
8169 self.target_node = ial.result[0]
8170 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8171 self.instance_name, self.lu.op.iallocator,
8172 utils.CommaJoin(ial.result))
8174 def _WaitUntilSync(self):
8175 """Poll with custom rpc for disk sync.
8177 This uses our own step-based rpc call.
8180 self.feedback_fn("* wait until resync is done")
8184 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8186 (self.instance.disks,
8189 for node, nres in result.items():
8190 nres.Raise("Cannot resync disks on node %s" % node)
8191 node_done, node_percent = nres.payload
8192 all_done = all_done and node_done
8193 if node_percent is not None:
8194 min_percent = min(min_percent, node_percent)
8196 if min_percent < 100:
8197 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8200 def _EnsureSecondary(self, node):
8201 """Demote a node to secondary.
8204 self.feedback_fn("* switching node %s to secondary mode" % node)
8206 for dev in self.instance.disks:
8207 self.cfg.SetDiskID(dev, node)
8209 result = self.rpc.call_blockdev_close(node, self.instance.name,
8210 self.instance.disks)
8211 result.Raise("Cannot change disk to secondary on node %s" % node)
8213 def _GoStandalone(self):
8214 """Disconnect from the network.
8217 self.feedback_fn("* changing into standalone mode")
8218 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8219 self.instance.disks)
8220 for node, nres in result.items():
8221 nres.Raise("Cannot disconnect disks node %s" % node)
8223 def _GoReconnect(self, multimaster):
8224 """Reconnect to the network.
8230 msg = "single-master"
8231 self.feedback_fn("* changing disks into %s mode" % msg)
8232 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8233 (self.instance.disks, self.instance),
8234 self.instance.name, multimaster)
8235 for node, nres in result.items():
8236 nres.Raise("Cannot change disks config on node %s" % node)
8238 def _ExecCleanup(self):
8239 """Try to cleanup after a failed migration.
8241 The cleanup is done by:
8242 - check that the instance is running only on one node
8243 (and update the config if needed)
8244 - change disks on its secondary node to secondary
8245 - wait until disks are fully synchronized
8246 - disconnect from the network
8247 - change disks into single-master mode
8248 - wait again until disks are fully synchronized
8251 instance = self.instance
8252 target_node = self.target_node
8253 source_node = self.source_node
8255 # check running on only one node
8256 self.feedback_fn("* checking where the instance actually runs"
8257 " (if this hangs, the hypervisor might be in"
8259 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8260 for node, result in ins_l.items():
8261 result.Raise("Can't contact node %s" % node)
8263 runningon_source = instance.name in ins_l[source_node].payload
8264 runningon_target = instance.name in ins_l[target_node].payload
8266 if runningon_source and runningon_target:
8267 raise errors.OpExecError("Instance seems to be running on two nodes,"
8268 " or the hypervisor is confused; you will have"
8269 " to ensure manually that it runs only on one"
8270 " and restart this operation")
8272 if not (runningon_source or runningon_target):
8273 raise errors.OpExecError("Instance does not seem to be running at all;"
8274 " in this case it's safer to repair by"
8275 " running 'gnt-instance stop' to ensure disk"
8276 " shutdown, and then restarting it")
8278 if runningon_target:
8279 # the migration has actually succeeded, we need to update the config
8280 self.feedback_fn("* instance running on secondary node (%s),"
8281 " updating config" % target_node)
8282 instance.primary_node = target_node
8283 self.cfg.Update(instance, self.feedback_fn)
8284 demoted_node = source_node
8286 self.feedback_fn("* instance confirmed to be running on its"
8287 " primary node (%s)" % source_node)
8288 demoted_node = target_node
8290 if instance.disk_template in constants.DTS_INT_MIRROR:
8291 self._EnsureSecondary(demoted_node)
8293 self._WaitUntilSync()
8294 except errors.OpExecError:
8295 # we ignore here errors, since if the device is standalone, it
8296 # won't be able to sync
8298 self._GoStandalone()
8299 self._GoReconnect(False)
8300 self._WaitUntilSync()
8302 self.feedback_fn("* done")
8304 def _RevertDiskStatus(self):
8305 """Try to revert the disk status after a failed migration.
8308 target_node = self.target_node
8309 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8313 self._EnsureSecondary(target_node)
8314 self._GoStandalone()
8315 self._GoReconnect(False)
8316 self._WaitUntilSync()
8317 except errors.OpExecError, err:
8318 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8319 " please try to recover the instance manually;"
8320 " error '%s'" % str(err))
8322 def _AbortMigration(self):
8323 """Call the hypervisor code to abort a started migration.
8326 instance = self.instance
8327 target_node = self.target_node
8328 source_node = self.source_node
8329 migration_info = self.migration_info
8331 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8335 abort_msg = abort_result.fail_msg
8337 logging.error("Aborting migration failed on target node %s: %s",
8338 target_node, abort_msg)
8339 # Don't raise an exception here, as we stil have to try to revert the
8340 # disk status, even if this step failed.
8342 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8343 instance, False, self.live)
8344 abort_msg = abort_result.fail_msg
8346 logging.error("Aborting migration failed on source node %s: %s",
8347 source_node, abort_msg)
8349 def _ExecMigration(self):
8350 """Migrate an instance.
8352 The migrate is done by:
8353 - change the disks into dual-master mode
8354 - wait until disks are fully synchronized again
8355 - migrate the instance
8356 - change disks on the new secondary node (the old primary) to secondary
8357 - wait until disks are fully synchronized
8358 - change disks into single-master mode
8361 instance = self.instance
8362 target_node = self.target_node
8363 source_node = self.source_node
8365 # Check for hypervisor version mismatch and warn the user.
8366 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8367 None, [self.instance.hypervisor])
8368 for ninfo in nodeinfo.values():
8369 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8371 (_, _, (src_info, )) = nodeinfo[source_node].payload
8372 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8374 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8375 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8376 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8377 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8378 if src_version != dst_version:
8379 self.feedback_fn("* warning: hypervisor version mismatch between"
8380 " source (%s) and target (%s) node" %
8381 (src_version, dst_version))
8383 self.feedback_fn("* checking disk consistency between source and target")
8384 for (idx, dev) in enumerate(instance.disks):
8385 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8386 raise errors.OpExecError("Disk %s is degraded or not fully"
8387 " synchronized on target node,"
8388 " aborting migration" % idx)
8390 if self.current_mem > self.tgt_free_mem:
8391 if not self.allow_runtime_changes:
8392 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8393 " free memory to fit instance %s on target"
8394 " node %s (have %dMB, need %dMB)" %
8395 (instance.name, target_node,
8396 self.tgt_free_mem, self.current_mem))
8397 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8398 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8401 rpcres.Raise("Cannot modify instance runtime memory")
8403 # First get the migration information from the remote node
8404 result = self.rpc.call_migration_info(source_node, instance)
8405 msg = result.fail_msg
8407 log_err = ("Failed fetching source migration information from %s: %s" %
8409 logging.error(log_err)
8410 raise errors.OpExecError(log_err)
8412 self.migration_info = migration_info = result.payload
8414 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8415 # Then switch the disks to master/master mode
8416 self._EnsureSecondary(target_node)
8417 self._GoStandalone()
8418 self._GoReconnect(True)
8419 self._WaitUntilSync()
8421 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8422 result = self.rpc.call_accept_instance(target_node,
8425 self.nodes_ip[target_node])
8427 msg = result.fail_msg
8429 logging.error("Instance pre-migration failed, trying to revert"
8430 " disk status: %s", msg)
8431 self.feedback_fn("Pre-migration failed, aborting")
8432 self._AbortMigration()
8433 self._RevertDiskStatus()
8434 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8435 (instance.name, msg))
8437 self.feedback_fn("* migrating instance to %s" % target_node)
8438 result = self.rpc.call_instance_migrate(source_node, instance,
8439 self.nodes_ip[target_node],
8441 msg = result.fail_msg
8443 logging.error("Instance migration failed, trying to revert"
8444 " disk status: %s", msg)
8445 self.feedback_fn("Migration failed, aborting")
8446 self._AbortMigration()
8447 self._RevertDiskStatus()
8448 raise errors.OpExecError("Could not migrate instance %s: %s" %
8449 (instance.name, msg))
8451 self.feedback_fn("* starting memory transfer")
8452 last_feedback = time.time()
8454 result = self.rpc.call_instance_get_migration_status(source_node,
8456 msg = result.fail_msg
8457 ms = result.payload # MigrationStatus instance
8458 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8459 logging.error("Instance migration failed, trying to revert"
8460 " disk status: %s", msg)
8461 self.feedback_fn("Migration failed, aborting")
8462 self._AbortMigration()
8463 self._RevertDiskStatus()
8464 raise errors.OpExecError("Could not migrate instance %s: %s" %
8465 (instance.name, msg))
8467 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8468 self.feedback_fn("* memory transfer complete")
8471 if (utils.TimeoutExpired(last_feedback,
8472 self._MIGRATION_FEEDBACK_INTERVAL) and
8473 ms.transferred_ram is not None):
8474 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8475 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8476 last_feedback = time.time()
8478 time.sleep(self._MIGRATION_POLL_INTERVAL)
8480 result = self.rpc.call_instance_finalize_migration_src(source_node,
8484 msg = result.fail_msg
8486 logging.error("Instance migration succeeded, but finalization failed"
8487 " on the source node: %s", msg)
8488 raise errors.OpExecError("Could not finalize instance migration: %s" %
8491 instance.primary_node = target_node
8493 # distribute new instance config to the other nodes
8494 self.cfg.Update(instance, self.feedback_fn)
8496 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8500 msg = result.fail_msg
8502 logging.error("Instance migration succeeded, but finalization failed"
8503 " on the target node: %s", msg)
8504 raise errors.OpExecError("Could not finalize instance migration: %s" %
8507 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8508 self._EnsureSecondary(source_node)
8509 self._WaitUntilSync()
8510 self._GoStandalone()
8511 self._GoReconnect(False)
8512 self._WaitUntilSync()
8514 # If the instance's disk template is `rbd' and there was a successful
8515 # migration, unmap the device from the source node.
8516 if self.instance.disk_template == constants.DT_RBD:
8517 disks = _ExpandCheckDisks(instance, instance.disks)
8518 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8520 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8521 msg = result.fail_msg
8523 logging.error("Migration was successful, but couldn't unmap the"
8524 " block device %s on source node %s: %s",
8525 disk.iv_name, source_node, msg)
8526 logging.error("You need to unmap the device %s manually on %s",
8527 disk.iv_name, source_node)
8529 self.feedback_fn("* done")
8531 def _ExecFailover(self):
8532 """Failover an instance.
8534 The failover is done by shutting it down on its present node and
8535 starting it on the secondary.
8538 instance = self.instance
8539 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8541 source_node = instance.primary_node
8542 target_node = self.target_node
8544 if instance.admin_state == constants.ADMINST_UP:
8545 self.feedback_fn("* checking disk consistency between source and target")
8546 for (idx, dev) in enumerate(instance.disks):
8547 # for drbd, these are drbd over lvm
8548 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8550 if primary_node.offline:
8551 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8553 (primary_node.name, idx, target_node))
8554 elif not self.ignore_consistency:
8555 raise errors.OpExecError("Disk %s is degraded on target node,"
8556 " aborting failover" % idx)
8558 self.feedback_fn("* not checking disk consistency as instance is not"
8561 self.feedback_fn("* shutting down instance on source node")
8562 logging.info("Shutting down instance %s on node %s",
8563 instance.name, source_node)
8565 result = self.rpc.call_instance_shutdown(source_node, instance,
8566 self.shutdown_timeout)
8567 msg = result.fail_msg
8569 if self.ignore_consistency or primary_node.offline:
8570 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8571 " proceeding anyway; please make sure node"
8572 " %s is down; error details: %s",
8573 instance.name, source_node, source_node, msg)
8575 raise errors.OpExecError("Could not shutdown instance %s on"
8577 (instance.name, source_node, msg))
8579 self.feedback_fn("* deactivating the instance's disks on source node")
8580 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8581 raise errors.OpExecError("Can't shut down the instance's disks")
8583 instance.primary_node = target_node
8584 # distribute new instance config to the other nodes
8585 self.cfg.Update(instance, self.feedback_fn)
8587 # Only start the instance if it's marked as up
8588 if instance.admin_state == constants.ADMINST_UP:
8589 self.feedback_fn("* activating the instance's disks on target node %s" %
8591 logging.info("Starting instance %s on node %s",
8592 instance.name, target_node)
8594 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8595 ignore_secondaries=True)
8597 _ShutdownInstanceDisks(self.lu, instance)
8598 raise errors.OpExecError("Can't activate the instance's disks")
8600 self.feedback_fn("* starting the instance on the target node %s" %
8602 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8604 msg = result.fail_msg
8606 _ShutdownInstanceDisks(self.lu, instance)
8607 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8608 (instance.name, target_node, msg))
8610 def Exec(self, feedback_fn):
8611 """Perform the migration.
8614 self.feedback_fn = feedback_fn
8615 self.source_node = self.instance.primary_node
8617 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8618 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8619 self.target_node = self.instance.secondary_nodes[0]
8620 # Otherwise self.target_node has been populated either
8621 # directly, or through an iallocator.
8623 self.all_nodes = [self.source_node, self.target_node]
8624 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8625 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8628 feedback_fn("Failover instance %s" % self.instance.name)
8629 self._ExecFailover()
8631 feedback_fn("Migrating instance %s" % self.instance.name)
8634 return self._ExecCleanup()
8636 return self._ExecMigration()
8639 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8641 """Wrapper around L{_CreateBlockDevInner}.
8643 This method annotates the root device first.
8646 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8647 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8651 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8653 """Create a tree of block devices on a given node.
8655 If this device type has to be created on secondaries, create it and
8658 If not, just recurse to children keeping the same 'force' value.
8660 @attention: The device has to be annotated already.
8662 @param lu: the lu on whose behalf we execute
8663 @param node: the node on which to create the device
8664 @type instance: L{objects.Instance}
8665 @param instance: the instance which owns the device
8666 @type device: L{objects.Disk}
8667 @param device: the device to create
8668 @type force_create: boolean
8669 @param force_create: whether to force creation of this device; this
8670 will be change to True whenever we find a device which has
8671 CreateOnSecondary() attribute
8672 @param info: the extra 'metadata' we should attach to the device
8673 (this will be represented as a LVM tag)
8674 @type force_open: boolean
8675 @param force_open: this parameter will be passes to the
8676 L{backend.BlockdevCreate} function where it specifies
8677 whether we run on primary or not, and it affects both
8678 the child assembly and the device own Open() execution
8681 if device.CreateOnSecondary():
8685 for child in device.children:
8686 _CreateBlockDevInner(lu, node, instance, child, force_create,
8689 if not force_create:
8692 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8695 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8696 """Create a single block device on a given node.
8698 This will not recurse over children of the device, so they must be
8701 @param lu: the lu on whose behalf we execute
8702 @param node: the node on which to create the device
8703 @type instance: L{objects.Instance}
8704 @param instance: the instance which owns the device
8705 @type device: L{objects.Disk}
8706 @param device: the device to create
8707 @param info: the extra 'metadata' we should attach to the device
8708 (this will be represented as a LVM tag)
8709 @type force_open: boolean
8710 @param force_open: this parameter will be passes to the
8711 L{backend.BlockdevCreate} function where it specifies
8712 whether we run on primary or not, and it affects both
8713 the child assembly and the device own Open() execution
8716 lu.cfg.SetDiskID(device, node)
8717 result = lu.rpc.call_blockdev_create(node, device, device.size,
8718 instance.name, force_open, info)
8719 result.Raise("Can't create block device %s on"
8720 " node %s for instance %s" % (device, node, instance.name))
8721 if device.physical_id is None:
8722 device.physical_id = result.payload
8725 def _GenerateUniqueNames(lu, exts):
8726 """Generate a suitable LV name.
8728 This will generate a logical volume name for the given instance.
8733 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8734 results.append("%s%s" % (new_id, val))
8738 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8739 iv_name, p_minor, s_minor):
8740 """Generate a drbd8 device complete with its children.
8743 assert len(vgnames) == len(names) == 2
8744 port = lu.cfg.AllocatePort()
8745 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8747 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8748 logical_id=(vgnames[0], names[0]),
8750 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8751 logical_id=(vgnames[1], names[1]),
8753 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8754 logical_id=(primary, secondary, port,
8757 children=[dev_data, dev_meta],
8758 iv_name=iv_name, params={})
8762 _DISK_TEMPLATE_NAME_PREFIX = {
8763 constants.DT_PLAIN: "",
8764 constants.DT_RBD: ".rbd",
8768 _DISK_TEMPLATE_DEVICE_TYPE = {
8769 constants.DT_PLAIN: constants.LD_LV,
8770 constants.DT_FILE: constants.LD_FILE,
8771 constants.DT_SHARED_FILE: constants.LD_FILE,
8772 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8773 constants.DT_RBD: constants.LD_RBD,
8777 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8778 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8779 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8780 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8781 """Generate the entire disk layout for a given template type.
8784 #TODO: compute space requirements
8786 vgname = lu.cfg.GetVGName()
8787 disk_count = len(disk_info)
8790 if template_name == constants.DT_DISKLESS:
8792 elif template_name == constants.DT_DRBD8:
8793 if len(secondary_nodes) != 1:
8794 raise errors.ProgrammerError("Wrong template configuration")
8795 remote_node = secondary_nodes[0]
8796 minors = lu.cfg.AllocateDRBDMinor(
8797 [primary_node, remote_node] * len(disk_info), instance_name)
8799 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8801 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8804 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8805 for i in range(disk_count)]):
8806 names.append(lv_prefix + "_data")
8807 names.append(lv_prefix + "_meta")
8808 for idx, disk in enumerate(disk_info):
8809 disk_index = idx + base_index
8810 data_vg = disk.get(constants.IDISK_VG, vgname)
8811 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8812 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8813 disk[constants.IDISK_SIZE],
8815 names[idx * 2:idx * 2 + 2],
8816 "disk/%d" % disk_index,
8817 minors[idx * 2], minors[idx * 2 + 1])
8818 disk_dev.mode = disk[constants.IDISK_MODE]
8819 disks.append(disk_dev)
8822 raise errors.ProgrammerError("Wrong template configuration")
8824 if template_name == constants.DT_FILE:
8826 elif template_name == constants.DT_SHARED_FILE:
8827 _req_shr_file_storage()
8829 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8830 if name_prefix is None:
8833 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8834 (name_prefix, base_index + i)
8835 for i in range(disk_count)])
8837 if template_name == constants.DT_PLAIN:
8838 def logical_id_fn(idx, _, disk):
8839 vg = disk.get(constants.IDISK_VG, vgname)
8840 return (vg, names[idx])
8841 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8843 lambda _, disk_index, disk: (file_driver,
8844 "%s/disk%d" % (file_storage_dir,
8846 elif template_name == constants.DT_BLOCK:
8848 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8849 disk[constants.IDISK_ADOPT])
8850 elif template_name == constants.DT_RBD:
8851 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8853 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8855 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8857 for idx, disk in enumerate(disk_info):
8858 disk_index = idx + base_index
8859 size = disk[constants.IDISK_SIZE]
8860 feedback_fn("* disk %s, size %s" %
8861 (disk_index, utils.FormatUnit(size, "h")))
8862 disks.append(objects.Disk(dev_type=dev_type, size=size,
8863 logical_id=logical_id_fn(idx, disk_index, disk),
8864 iv_name="disk/%d" % disk_index,
8865 mode=disk[constants.IDISK_MODE],
8871 def _GetInstanceInfoText(instance):
8872 """Compute that text that should be added to the disk's metadata.
8875 return "originstname+%s" % instance.name
8878 def _CalcEta(time_taken, written, total_size):
8879 """Calculates the ETA based on size written and total size.
8881 @param time_taken: The time taken so far
8882 @param written: amount written so far
8883 @param total_size: The total size of data to be written
8884 @return: The remaining time in seconds
8887 avg_time = time_taken / float(written)
8888 return (total_size - written) * avg_time
8891 def _WipeDisks(lu, instance):
8892 """Wipes instance disks.
8894 @type lu: L{LogicalUnit}
8895 @param lu: the logical unit on whose behalf we execute
8896 @type instance: L{objects.Instance}
8897 @param instance: the instance whose disks we should create
8898 @return: the success of the wipe
8901 node = instance.primary_node
8903 for device in instance.disks:
8904 lu.cfg.SetDiskID(device, node)
8906 logging.info("Pause sync of instance %s disks", instance.name)
8907 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8908 (instance.disks, instance),
8911 for idx, success in enumerate(result.payload):
8913 logging.warn("pause-sync of instance %s for disks %d failed",
8917 for idx, device in enumerate(instance.disks):
8918 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8919 # MAX_WIPE_CHUNK at max
8920 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8921 constants.MIN_WIPE_CHUNK_PERCENT)
8922 # we _must_ make this an int, otherwise rounding errors will
8924 wipe_chunk_size = int(wipe_chunk_size)
8926 lu.LogInfo("* Wiping disk %d", idx)
8927 logging.info("Wiping disk %d for instance %s, node %s using"
8928 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8933 start_time = time.time()
8935 while offset < size:
8936 wipe_size = min(wipe_chunk_size, size - offset)
8937 logging.debug("Wiping disk %d, offset %s, chunk %s",
8938 idx, offset, wipe_size)
8939 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8941 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8942 (idx, offset, wipe_size))
8945 if now - last_output >= 60:
8946 eta = _CalcEta(now - start_time, offset, size)
8947 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8948 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8951 logging.info("Resume sync of instance %s disks", instance.name)
8953 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8954 (instance.disks, instance),
8957 for idx, success in enumerate(result.payload):
8959 lu.LogWarning("Resume sync of disk %d failed, please have a"
8960 " look at the status and troubleshoot the issue", idx)
8961 logging.warn("resume-sync of instance %s for disks %d failed",
8965 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8966 """Create all disks for an instance.
8968 This abstracts away some work from AddInstance.
8970 @type lu: L{LogicalUnit}
8971 @param lu: the logical unit on whose behalf we execute
8972 @type instance: L{objects.Instance}
8973 @param instance: the instance whose disks we should create
8975 @param to_skip: list of indices to skip
8976 @type target_node: string
8977 @param target_node: if passed, overrides the target node for creation
8979 @return: the success of the creation
8982 info = _GetInstanceInfoText(instance)
8983 if target_node is None:
8984 pnode = instance.primary_node
8985 all_nodes = instance.all_nodes
8990 if instance.disk_template in constants.DTS_FILEBASED:
8991 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8992 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8994 result.Raise("Failed to create directory '%s' on"
8995 " node %s" % (file_storage_dir, pnode))
8997 # Note: this needs to be kept in sync with adding of disks in
8998 # LUInstanceSetParams
8999 for idx, device in enumerate(instance.disks):
9000 if to_skip and idx in to_skip:
9002 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9004 for node in all_nodes:
9005 f_create = node == pnode
9006 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9009 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9010 """Remove all disks for an instance.
9012 This abstracts away some work from `AddInstance()` and
9013 `RemoveInstance()`. Note that in case some of the devices couldn't
9014 be removed, the removal will continue with the other ones (compare
9015 with `_CreateDisks()`).
9017 @type lu: L{LogicalUnit}
9018 @param lu: the logical unit on whose behalf we execute
9019 @type instance: L{objects.Instance}
9020 @param instance: the instance whose disks we should remove
9021 @type target_node: string
9022 @param target_node: used to override the node on which to remove the disks
9024 @return: the success of the removal
9027 logging.info("Removing block devices for instance %s", instance.name)
9030 ports_to_release = set()
9031 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9032 for (idx, device) in enumerate(anno_disks):
9034 edata = [(target_node, device)]
9036 edata = device.ComputeNodeTree(instance.primary_node)
9037 for node, disk in edata:
9038 lu.cfg.SetDiskID(disk, node)
9039 result = lu.rpc.call_blockdev_remove(node, disk)
9041 lu.LogWarning("Could not remove disk %s on node %s,"
9042 " continuing anyway: %s", idx, node, result.fail_msg)
9043 if not (result.offline and node != instance.primary_node):
9046 # if this is a DRBD disk, return its port to the pool
9047 if device.dev_type in constants.LDS_DRBD:
9048 ports_to_release.add(device.logical_id[2])
9050 if all_result or ignore_failures:
9051 for port in ports_to_release:
9052 lu.cfg.AddTcpUdpPort(port)
9054 if instance.disk_template == constants.DT_FILE:
9055 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9059 tgt = instance.primary_node
9060 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9062 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9063 file_storage_dir, instance.primary_node, result.fail_msg)
9069 def _ComputeDiskSizePerVG(disk_template, disks):
9070 """Compute disk size requirements in the volume group
9073 def _compute(disks, payload):
9074 """Universal algorithm.
9079 vgs[disk[constants.IDISK_VG]] = \
9080 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9084 # Required free disk space as a function of disk and swap space
9086 constants.DT_DISKLESS: {},
9087 constants.DT_PLAIN: _compute(disks, 0),
9088 # 128 MB are added for drbd metadata for each disk
9089 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9090 constants.DT_FILE: {},
9091 constants.DT_SHARED_FILE: {},
9094 if disk_template not in req_size_dict:
9095 raise errors.ProgrammerError("Disk template '%s' size requirement"
9096 " is unknown" % disk_template)
9098 return req_size_dict[disk_template]
9101 def _ComputeDiskSize(disk_template, disks):
9102 """Compute disk size requirements in the volume group
9105 # Required free disk space as a function of disk and swap space
9107 constants.DT_DISKLESS: None,
9108 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9109 # 128 MB are added for drbd metadata for each disk
9111 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9112 constants.DT_FILE: None,
9113 constants.DT_SHARED_FILE: 0,
9114 constants.DT_BLOCK: 0,
9115 constants.DT_RBD: 0,
9118 if disk_template not in req_size_dict:
9119 raise errors.ProgrammerError("Disk template '%s' size requirement"
9120 " is unknown" % disk_template)
9122 return req_size_dict[disk_template]
9125 def _FilterVmNodes(lu, nodenames):
9126 """Filters out non-vm_capable nodes from a list.
9128 @type lu: L{LogicalUnit}
9129 @param lu: the logical unit for which we check
9130 @type nodenames: list
9131 @param nodenames: the list of nodes on which we should check
9133 @return: the list of vm-capable nodes
9136 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9137 return [name for name in nodenames if name not in vm_nodes]
9140 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9141 """Hypervisor parameter validation.
9143 This function abstract the hypervisor parameter validation to be
9144 used in both instance create and instance modify.
9146 @type lu: L{LogicalUnit}
9147 @param lu: the logical unit for which we check
9148 @type nodenames: list
9149 @param nodenames: the list of nodes on which we should check
9150 @type hvname: string
9151 @param hvname: the name of the hypervisor we should use
9152 @type hvparams: dict
9153 @param hvparams: the parameters which we need to check
9154 @raise errors.OpPrereqError: if the parameters are not valid
9157 nodenames = _FilterVmNodes(lu, nodenames)
9159 cluster = lu.cfg.GetClusterInfo()
9160 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9162 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9163 for node in nodenames:
9167 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9170 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9171 """OS parameters validation.
9173 @type lu: L{LogicalUnit}
9174 @param lu: the logical unit for which we check
9175 @type required: boolean
9176 @param required: whether the validation should fail if the OS is not
9178 @type nodenames: list
9179 @param nodenames: the list of nodes on which we should check
9180 @type osname: string
9181 @param osname: the name of the hypervisor we should use
9182 @type osparams: dict
9183 @param osparams: the parameters which we need to check
9184 @raise errors.OpPrereqError: if the parameters are not valid
9187 nodenames = _FilterVmNodes(lu, nodenames)
9188 result = lu.rpc.call_os_validate(nodenames, required, osname,
9189 [constants.OS_VALIDATE_PARAMETERS],
9191 for node, nres in result.items():
9192 # we don't check for offline cases since this should be run only
9193 # against the master node and/or an instance's nodes
9194 nres.Raise("OS Parameters validation failed on node %s" % node)
9195 if not nres.payload:
9196 lu.LogInfo("OS %s not found on node %s, validation skipped",
9200 class LUInstanceCreate(LogicalUnit):
9201 """Create an instance.
9204 HPATH = "instance-add"
9205 HTYPE = constants.HTYPE_INSTANCE
9208 def CheckArguments(self):
9212 # do not require name_check to ease forward/backward compatibility
9214 if self.op.no_install and self.op.start:
9215 self.LogInfo("No-installation mode selected, disabling startup")
9216 self.op.start = False
9217 # validate/normalize the instance name
9218 self.op.instance_name = \
9219 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9221 if self.op.ip_check and not self.op.name_check:
9222 # TODO: make the ip check more flexible and not depend on the name check
9223 raise errors.OpPrereqError("Cannot do IP address check without a name"
9224 " check", errors.ECODE_INVAL)
9226 # check nics' parameter names
9227 for nic in self.op.nics:
9228 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9230 # check disks. parameter names and consistent adopt/no-adopt strategy
9231 has_adopt = has_no_adopt = False
9232 for disk in self.op.disks:
9233 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9234 if constants.IDISK_ADOPT in disk:
9238 if has_adopt and has_no_adopt:
9239 raise errors.OpPrereqError("Either all disks are adopted or none is",
9242 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9243 raise errors.OpPrereqError("Disk adoption is not supported for the"
9244 " '%s' disk template" %
9245 self.op.disk_template,
9247 if self.op.iallocator is not None:
9248 raise errors.OpPrereqError("Disk adoption not allowed with an"
9249 " iallocator script", errors.ECODE_INVAL)
9250 if self.op.mode == constants.INSTANCE_IMPORT:
9251 raise errors.OpPrereqError("Disk adoption not allowed for"
9252 " instance import", errors.ECODE_INVAL)
9254 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9255 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9256 " but no 'adopt' parameter given" %
9257 self.op.disk_template,
9260 self.adopt_disks = has_adopt
9262 # instance name verification
9263 if self.op.name_check:
9264 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9265 self.op.instance_name = self.hostname1.name
9266 # used in CheckPrereq for ip ping check
9267 self.check_ip = self.hostname1.ip
9269 self.check_ip = None
9271 # file storage checks
9272 if (self.op.file_driver and
9273 not self.op.file_driver in constants.FILE_DRIVER):
9274 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9275 self.op.file_driver, errors.ECODE_INVAL)
9277 if self.op.disk_template == constants.DT_FILE:
9278 opcodes.RequireFileStorage()
9279 elif self.op.disk_template == constants.DT_SHARED_FILE:
9280 opcodes.RequireSharedFileStorage()
9282 ### Node/iallocator related checks
9283 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9285 if self.op.pnode is not None:
9286 if self.op.disk_template in constants.DTS_INT_MIRROR:
9287 if self.op.snode is None:
9288 raise errors.OpPrereqError("The networked disk templates need"
9289 " a mirror node", errors.ECODE_INVAL)
9291 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9293 self.op.snode = None
9295 self._cds = _GetClusterDomainSecret()
9297 if self.op.mode == constants.INSTANCE_IMPORT:
9298 # On import force_variant must be True, because if we forced it at
9299 # initial install, our only chance when importing it back is that it
9301 self.op.force_variant = True
9303 if self.op.no_install:
9304 self.LogInfo("No-installation mode has no effect during import")
9306 elif self.op.mode == constants.INSTANCE_CREATE:
9307 if self.op.os_type is None:
9308 raise errors.OpPrereqError("No guest OS specified",
9310 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9311 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9312 " installation" % self.op.os_type,
9314 if self.op.disk_template is None:
9315 raise errors.OpPrereqError("No disk template specified",
9318 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9319 # Check handshake to ensure both clusters have the same domain secret
9320 src_handshake = self.op.source_handshake
9321 if not src_handshake:
9322 raise errors.OpPrereqError("Missing source handshake",
9325 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9328 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9331 # Load and check source CA
9332 self.source_x509_ca_pem = self.op.source_x509_ca
9333 if not self.source_x509_ca_pem:
9334 raise errors.OpPrereqError("Missing source X509 CA",
9338 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9340 except OpenSSL.crypto.Error, err:
9341 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9342 (err, ), errors.ECODE_INVAL)
9344 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9345 if errcode is not None:
9346 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9349 self.source_x509_ca = cert
9351 src_instance_name = self.op.source_instance_name
9352 if not src_instance_name:
9353 raise errors.OpPrereqError("Missing source instance name",
9356 self.source_instance_name = \
9357 netutils.GetHostname(name=src_instance_name).name
9360 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9361 self.op.mode, errors.ECODE_INVAL)
9363 def ExpandNames(self):
9364 """ExpandNames for CreateInstance.
9366 Figure out the right locks for instance creation.
9369 self.needed_locks = {}
9371 instance_name = self.op.instance_name
9372 # this is just a preventive check, but someone might still add this
9373 # instance in the meantime, and creation will fail at lock-add time
9374 if instance_name in self.cfg.GetInstanceList():
9375 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9376 instance_name, errors.ECODE_EXISTS)
9378 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9380 if self.op.iallocator:
9381 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9382 # specifying a group on instance creation and then selecting nodes from
9384 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9385 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9387 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9388 nodelist = [self.op.pnode]
9389 if self.op.snode is not None:
9390 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9391 nodelist.append(self.op.snode)
9392 self.needed_locks[locking.LEVEL_NODE] = nodelist
9393 # Lock resources of instance's primary and secondary nodes (copy to
9394 # prevent accidential modification)
9395 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9397 # in case of import lock the source node too
9398 if self.op.mode == constants.INSTANCE_IMPORT:
9399 src_node = self.op.src_node
9400 src_path = self.op.src_path
9402 if src_path is None:
9403 self.op.src_path = src_path = self.op.instance_name
9405 if src_node is None:
9406 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9407 self.op.src_node = None
9408 if os.path.isabs(src_path):
9409 raise errors.OpPrereqError("Importing an instance from a path"
9410 " requires a source node option",
9413 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9414 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9415 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9416 if not os.path.isabs(src_path):
9417 self.op.src_path = src_path = \
9418 utils.PathJoin(constants.EXPORT_DIR, src_path)
9420 def _RunAllocator(self):
9421 """Run the allocator based on input opcode.
9424 nics = [n.ToDict() for n in self.nics]
9425 ial = IAllocator(self.cfg, self.rpc,
9426 mode=constants.IALLOCATOR_MODE_ALLOC,
9427 name=self.op.instance_name,
9428 disk_template=self.op.disk_template,
9431 vcpus=self.be_full[constants.BE_VCPUS],
9432 memory=self.be_full[constants.BE_MAXMEM],
9433 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9436 hypervisor=self.op.hypervisor,
9439 ial.Run(self.op.iallocator)
9442 raise errors.OpPrereqError("Can't compute nodes using"
9443 " iallocator '%s': %s" %
9444 (self.op.iallocator, ial.info),
9446 if len(ial.result) != ial.required_nodes:
9447 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9448 " of nodes (%s), required %s" %
9449 (self.op.iallocator, len(ial.result),
9450 ial.required_nodes), errors.ECODE_FAULT)
9451 self.op.pnode = ial.result[0]
9452 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9453 self.op.instance_name, self.op.iallocator,
9454 utils.CommaJoin(ial.result))
9455 if ial.required_nodes == 2:
9456 self.op.snode = ial.result[1]
9458 def BuildHooksEnv(self):
9461 This runs on master, primary and secondary nodes of the instance.
9465 "ADD_MODE": self.op.mode,
9467 if self.op.mode == constants.INSTANCE_IMPORT:
9468 env["SRC_NODE"] = self.op.src_node
9469 env["SRC_PATH"] = self.op.src_path
9470 env["SRC_IMAGES"] = self.src_images
9472 env.update(_BuildInstanceHookEnv(
9473 name=self.op.instance_name,
9474 primary_node=self.op.pnode,
9475 secondary_nodes=self.secondaries,
9476 status=self.op.start,
9477 os_type=self.op.os_type,
9478 minmem=self.be_full[constants.BE_MINMEM],
9479 maxmem=self.be_full[constants.BE_MAXMEM],
9480 vcpus=self.be_full[constants.BE_VCPUS],
9481 nics=_NICListToTuple(self, self.nics),
9482 disk_template=self.op.disk_template,
9483 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9484 for d in self.disks],
9487 hypervisor_name=self.op.hypervisor,
9493 def BuildHooksNodes(self):
9494 """Build hooks nodes.
9497 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9500 def _ReadExportInfo(self):
9501 """Reads the export information from disk.
9503 It will override the opcode source node and path with the actual
9504 information, if these two were not specified before.
9506 @return: the export information
9509 assert self.op.mode == constants.INSTANCE_IMPORT
9511 src_node = self.op.src_node
9512 src_path = self.op.src_path
9514 if src_node is None:
9515 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9516 exp_list = self.rpc.call_export_list(locked_nodes)
9518 for node in exp_list:
9519 if exp_list[node].fail_msg:
9521 if src_path in exp_list[node].payload:
9523 self.op.src_node = src_node = node
9524 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9528 raise errors.OpPrereqError("No export found for relative path %s" %
9529 src_path, errors.ECODE_INVAL)
9531 _CheckNodeOnline(self, src_node)
9532 result = self.rpc.call_export_info(src_node, src_path)
9533 result.Raise("No export or invalid export found in dir %s" % src_path)
9535 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9536 if not export_info.has_section(constants.INISECT_EXP):
9537 raise errors.ProgrammerError("Corrupted export config",
9538 errors.ECODE_ENVIRON)
9540 ei_version = export_info.get(constants.INISECT_EXP, "version")
9541 if (int(ei_version) != constants.EXPORT_VERSION):
9542 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9543 (ei_version, constants.EXPORT_VERSION),
9544 errors.ECODE_ENVIRON)
9547 def _ReadExportParams(self, einfo):
9548 """Use export parameters as defaults.
9550 In case the opcode doesn't specify (as in override) some instance
9551 parameters, then try to use them from the export information, if
9555 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9557 if self.op.disk_template is None:
9558 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9559 self.op.disk_template = einfo.get(constants.INISECT_INS,
9561 if self.op.disk_template not in constants.DISK_TEMPLATES:
9562 raise errors.OpPrereqError("Disk template specified in configuration"
9563 " file is not one of the allowed values:"
9564 " %s" % " ".join(constants.DISK_TEMPLATES))
9566 raise errors.OpPrereqError("No disk template specified and the export"
9567 " is missing the disk_template information",
9570 if not self.op.disks:
9572 # TODO: import the disk iv_name too
9573 for idx in range(constants.MAX_DISKS):
9574 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9575 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9576 disks.append({constants.IDISK_SIZE: disk_sz})
9577 self.op.disks = disks
9578 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9579 raise errors.OpPrereqError("No disk info specified and the export"
9580 " is missing the disk information",
9583 if not self.op.nics:
9585 for idx in range(constants.MAX_NICS):
9586 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9588 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9589 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9596 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9597 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9599 if (self.op.hypervisor is None and
9600 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9601 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9603 if einfo.has_section(constants.INISECT_HYP):
9604 # use the export parameters but do not override the ones
9605 # specified by the user
9606 for name, value in einfo.items(constants.INISECT_HYP):
9607 if name not in self.op.hvparams:
9608 self.op.hvparams[name] = value
9610 if einfo.has_section(constants.INISECT_BEP):
9611 # use the parameters, without overriding
9612 for name, value in einfo.items(constants.INISECT_BEP):
9613 if name not in self.op.beparams:
9614 self.op.beparams[name] = value
9615 # Compatibility for the old "memory" be param
9616 if name == constants.BE_MEMORY:
9617 if constants.BE_MAXMEM not in self.op.beparams:
9618 self.op.beparams[constants.BE_MAXMEM] = value
9619 if constants.BE_MINMEM not in self.op.beparams:
9620 self.op.beparams[constants.BE_MINMEM] = value
9622 # try to read the parameters old style, from the main section
9623 for name in constants.BES_PARAMETERS:
9624 if (name not in self.op.beparams and
9625 einfo.has_option(constants.INISECT_INS, name)):
9626 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9628 if einfo.has_section(constants.INISECT_OSP):
9629 # use the parameters, without overriding
9630 for name, value in einfo.items(constants.INISECT_OSP):
9631 if name not in self.op.osparams:
9632 self.op.osparams[name] = value
9634 def _RevertToDefaults(self, cluster):
9635 """Revert the instance parameters to the default values.
9639 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9640 for name in self.op.hvparams.keys():
9641 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9642 del self.op.hvparams[name]
9644 be_defs = cluster.SimpleFillBE({})
9645 for name in self.op.beparams.keys():
9646 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9647 del self.op.beparams[name]
9649 nic_defs = cluster.SimpleFillNIC({})
9650 for nic in self.op.nics:
9651 for name in constants.NICS_PARAMETERS:
9652 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9655 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9656 for name in self.op.osparams.keys():
9657 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9658 del self.op.osparams[name]
9660 def _CalculateFileStorageDir(self):
9661 """Calculate final instance file storage dir.
9664 # file storage dir calculation/check
9665 self.instance_file_storage_dir = None
9666 if self.op.disk_template in constants.DTS_FILEBASED:
9667 # build the full file storage dir path
9670 if self.op.disk_template == constants.DT_SHARED_FILE:
9671 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9673 get_fsd_fn = self.cfg.GetFileStorageDir
9675 cfg_storagedir = get_fsd_fn()
9676 if not cfg_storagedir:
9677 raise errors.OpPrereqError("Cluster file storage dir not defined")
9678 joinargs.append(cfg_storagedir)
9680 if self.op.file_storage_dir is not None:
9681 joinargs.append(self.op.file_storage_dir)
9683 joinargs.append(self.op.instance_name)
9685 # pylint: disable=W0142
9686 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9688 def CheckPrereq(self): # pylint: disable=R0914
9689 """Check prerequisites.
9692 self._CalculateFileStorageDir()
9694 if self.op.mode == constants.INSTANCE_IMPORT:
9695 export_info = self._ReadExportInfo()
9696 self._ReadExportParams(export_info)
9697 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9699 self._old_instance_name = None
9701 if (not self.cfg.GetVGName() and
9702 self.op.disk_template not in constants.DTS_NOT_LVM):
9703 raise errors.OpPrereqError("Cluster does not support lvm-based"
9704 " instances", errors.ECODE_STATE)
9706 if (self.op.hypervisor is None or
9707 self.op.hypervisor == constants.VALUE_AUTO):
9708 self.op.hypervisor = self.cfg.GetHypervisorType()
9710 cluster = self.cfg.GetClusterInfo()
9711 enabled_hvs = cluster.enabled_hypervisors
9712 if self.op.hypervisor not in enabled_hvs:
9713 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9714 " cluster (%s)" % (self.op.hypervisor,
9715 ",".join(enabled_hvs)),
9718 # Check tag validity
9719 for tag in self.op.tags:
9720 objects.TaggableObject.ValidateTag(tag)
9722 # check hypervisor parameter syntax (locally)
9723 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9724 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9726 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9727 hv_type.CheckParameterSyntax(filled_hvp)
9728 self.hv_full = filled_hvp
9729 # check that we don't specify global parameters on an instance
9730 _CheckGlobalHvParams(self.op.hvparams)
9732 # fill and remember the beparams dict
9733 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9734 for param, value in self.op.beparams.iteritems():
9735 if value == constants.VALUE_AUTO:
9736 self.op.beparams[param] = default_beparams[param]
9737 objects.UpgradeBeParams(self.op.beparams)
9738 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9739 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9741 # build os parameters
9742 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9744 # now that hvp/bep are in final format, let's reset to defaults,
9746 if self.op.identify_defaults:
9747 self._RevertToDefaults(cluster)
9751 for idx, nic in enumerate(self.op.nics):
9752 nic_mode_req = nic.get(constants.INIC_MODE, None)
9753 nic_mode = nic_mode_req
9754 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9755 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9757 # in routed mode, for the first nic, the default ip is 'auto'
9758 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9759 default_ip_mode = constants.VALUE_AUTO
9761 default_ip_mode = constants.VALUE_NONE
9763 # ip validity checks
9764 ip = nic.get(constants.INIC_IP, default_ip_mode)
9765 if ip is None or ip.lower() == constants.VALUE_NONE:
9767 elif ip.lower() == constants.VALUE_AUTO:
9768 if not self.op.name_check:
9769 raise errors.OpPrereqError("IP address set to auto but name checks"
9770 " have been skipped",
9772 nic_ip = self.hostname1.ip
9774 if not netutils.IPAddress.IsValid(ip):
9775 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9779 # TODO: check the ip address for uniqueness
9780 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9781 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9784 # MAC address verification
9785 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9786 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9787 mac = utils.NormalizeAndValidateMac(mac)
9790 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9791 except errors.ReservationError:
9792 raise errors.OpPrereqError("MAC address %s already in use"
9793 " in cluster" % mac,
9794 errors.ECODE_NOTUNIQUE)
9796 # Build nic parameters
9797 link = nic.get(constants.INIC_LINK, None)
9798 if link == constants.VALUE_AUTO:
9799 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9802 nicparams[constants.NIC_MODE] = nic_mode
9804 nicparams[constants.NIC_LINK] = link
9806 check_params = cluster.SimpleFillNIC(nicparams)
9807 objects.NIC.CheckParameterSyntax(check_params)
9808 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9810 # disk checks/pre-build
9811 default_vg = self.cfg.GetVGName()
9813 for disk in self.op.disks:
9814 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9815 if mode not in constants.DISK_ACCESS_SET:
9816 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9817 mode, errors.ECODE_INVAL)
9818 size = disk.get(constants.IDISK_SIZE, None)
9820 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9823 except (TypeError, ValueError):
9824 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9827 data_vg = disk.get(constants.IDISK_VG, default_vg)
9829 constants.IDISK_SIZE: size,
9830 constants.IDISK_MODE: mode,
9831 constants.IDISK_VG: data_vg,
9833 if constants.IDISK_METAVG in disk:
9834 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9835 if constants.IDISK_ADOPT in disk:
9836 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9837 self.disks.append(new_disk)
9839 if self.op.mode == constants.INSTANCE_IMPORT:
9841 for idx in range(len(self.disks)):
9842 option = "disk%d_dump" % idx
9843 if export_info.has_option(constants.INISECT_INS, option):
9844 # FIXME: are the old os-es, disk sizes, etc. useful?
9845 export_name = export_info.get(constants.INISECT_INS, option)
9846 image = utils.PathJoin(self.op.src_path, export_name)
9847 disk_images.append(image)
9849 disk_images.append(False)
9851 self.src_images = disk_images
9853 if self.op.instance_name == self._old_instance_name:
9854 for idx, nic in enumerate(self.nics):
9855 if nic.mac == constants.VALUE_AUTO:
9856 nic_mac_ini = "nic%d_mac" % idx
9857 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9859 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9861 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9862 if self.op.ip_check:
9863 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9864 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9865 (self.check_ip, self.op.instance_name),
9866 errors.ECODE_NOTUNIQUE)
9868 #### mac address generation
9869 # By generating here the mac address both the allocator and the hooks get
9870 # the real final mac address rather than the 'auto' or 'generate' value.
9871 # There is a race condition between the generation and the instance object
9872 # creation, which means that we know the mac is valid now, but we're not
9873 # sure it will be when we actually add the instance. If things go bad
9874 # adding the instance will abort because of a duplicate mac, and the
9875 # creation job will fail.
9876 for nic in self.nics:
9877 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9878 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9882 if self.op.iallocator is not None:
9883 self._RunAllocator()
9885 # Release all unneeded node locks
9886 _ReleaseLocks(self, locking.LEVEL_NODE,
9887 keep=filter(None, [self.op.pnode, self.op.snode,
9889 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9890 keep=filter(None, [self.op.pnode, self.op.snode,
9893 #### node related checks
9895 # check primary node
9896 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9897 assert self.pnode is not None, \
9898 "Cannot retrieve locked node %s" % self.op.pnode
9900 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9901 pnode.name, errors.ECODE_STATE)
9903 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9904 pnode.name, errors.ECODE_STATE)
9905 if not pnode.vm_capable:
9906 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9907 " '%s'" % pnode.name, errors.ECODE_STATE)
9909 self.secondaries = []
9911 # mirror node verification
9912 if self.op.disk_template in constants.DTS_INT_MIRROR:
9913 if self.op.snode == pnode.name:
9914 raise errors.OpPrereqError("The secondary node cannot be the"
9915 " primary node", errors.ECODE_INVAL)
9916 _CheckNodeOnline(self, self.op.snode)
9917 _CheckNodeNotDrained(self, self.op.snode)
9918 _CheckNodeVmCapable(self, self.op.snode)
9919 self.secondaries.append(self.op.snode)
9921 snode = self.cfg.GetNodeInfo(self.op.snode)
9922 if pnode.group != snode.group:
9923 self.LogWarning("The primary and secondary nodes are in two"
9924 " different node groups; the disk parameters"
9925 " from the first disk's node group will be"
9928 nodenames = [pnode.name] + self.secondaries
9930 # Verify instance specs
9931 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9933 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9934 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9935 constants.ISPEC_DISK_COUNT: len(self.disks),
9936 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9937 constants.ISPEC_NIC_COUNT: len(self.nics),
9938 constants.ISPEC_SPINDLE_USE: spindle_use,
9941 group_info = self.cfg.GetNodeGroup(pnode.group)
9942 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9943 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9944 if not self.op.ignore_ipolicy and res:
9945 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9946 " policy: %s") % (pnode.group,
9947 utils.CommaJoin(res)),
9950 if not self.adopt_disks:
9951 if self.op.disk_template == constants.DT_RBD:
9952 # _CheckRADOSFreeSpace() is just a placeholder.
9953 # Any function that checks prerequisites can be placed here.
9954 # Check if there is enough space on the RADOS cluster.
9955 _CheckRADOSFreeSpace()
9957 # Check lv size requirements, if not adopting
9958 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9959 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9961 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9962 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9963 disk[constants.IDISK_ADOPT])
9964 for disk in self.disks])
9965 if len(all_lvs) != len(self.disks):
9966 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9968 for lv_name in all_lvs:
9970 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9971 # to ReserveLV uses the same syntax
9972 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9973 except errors.ReservationError:
9974 raise errors.OpPrereqError("LV named %s used by another instance" %
9975 lv_name, errors.ECODE_NOTUNIQUE)
9977 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9978 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9980 node_lvs = self.rpc.call_lv_list([pnode.name],
9981 vg_names.payload.keys())[pnode.name]
9982 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9983 node_lvs = node_lvs.payload
9985 delta = all_lvs.difference(node_lvs.keys())
9987 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9988 utils.CommaJoin(delta),
9990 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9992 raise errors.OpPrereqError("Online logical volumes found, cannot"
9993 " adopt: %s" % utils.CommaJoin(online_lvs),
9995 # update the size of disk based on what is found
9996 for dsk in self.disks:
9997 dsk[constants.IDISK_SIZE] = \
9998 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9999 dsk[constants.IDISK_ADOPT])][0]))
10001 elif self.op.disk_template == constants.DT_BLOCK:
10002 # Normalize and de-duplicate device paths
10003 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10004 for disk in self.disks])
10005 if len(all_disks) != len(self.disks):
10006 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10007 errors.ECODE_INVAL)
10008 baddisks = [d for d in all_disks
10009 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10011 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10012 " cannot be adopted" %
10013 (", ".join(baddisks),
10014 constants.ADOPTABLE_BLOCKDEV_ROOT),
10015 errors.ECODE_INVAL)
10017 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10018 list(all_disks))[pnode.name]
10019 node_disks.Raise("Cannot get block device information from node %s" %
10021 node_disks = node_disks.payload
10022 delta = all_disks.difference(node_disks.keys())
10024 raise errors.OpPrereqError("Missing block device(s): %s" %
10025 utils.CommaJoin(delta),
10026 errors.ECODE_INVAL)
10027 for dsk in self.disks:
10028 dsk[constants.IDISK_SIZE] = \
10029 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10031 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10033 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10034 # check OS parameters (remotely)
10035 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10037 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10039 # memory check on primary node
10040 #TODO(dynmem): use MINMEM for checking
10042 _CheckNodeFreeMemory(self, self.pnode.name,
10043 "creating instance %s" % self.op.instance_name,
10044 self.be_full[constants.BE_MAXMEM],
10045 self.op.hypervisor)
10047 self.dry_run_result = list(nodenames)
10049 def Exec(self, feedback_fn):
10050 """Create and add the instance to the cluster.
10053 instance = self.op.instance_name
10054 pnode_name = self.pnode.name
10056 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10057 self.owned_locks(locking.LEVEL_NODE)), \
10058 "Node locks differ from node resource locks"
10060 ht_kind = self.op.hypervisor
10061 if ht_kind in constants.HTS_REQ_PORT:
10062 network_port = self.cfg.AllocatePort()
10064 network_port = None
10066 # This is ugly but we got a chicken-egg problem here
10067 # We can only take the group disk parameters, as the instance
10068 # has no disks yet (we are generating them right here).
10069 node = self.cfg.GetNodeInfo(pnode_name)
10070 nodegroup = self.cfg.GetNodeGroup(node.group)
10071 disks = _GenerateDiskTemplate(self,
10072 self.op.disk_template,
10073 instance, pnode_name,
10076 self.instance_file_storage_dir,
10077 self.op.file_driver,
10080 self.cfg.GetGroupDiskParams(nodegroup))
10082 iobj = objects.Instance(name=instance, os=self.op.os_type,
10083 primary_node=pnode_name,
10084 nics=self.nics, disks=disks,
10085 disk_template=self.op.disk_template,
10086 admin_state=constants.ADMINST_DOWN,
10087 network_port=network_port,
10088 beparams=self.op.beparams,
10089 hvparams=self.op.hvparams,
10090 hypervisor=self.op.hypervisor,
10091 osparams=self.op.osparams,
10095 for tag in self.op.tags:
10098 if self.adopt_disks:
10099 if self.op.disk_template == constants.DT_PLAIN:
10100 # rename LVs to the newly-generated names; we need to construct
10101 # 'fake' LV disks with the old data, plus the new unique_id
10102 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10104 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10105 rename_to.append(t_dsk.logical_id)
10106 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10107 self.cfg.SetDiskID(t_dsk, pnode_name)
10108 result = self.rpc.call_blockdev_rename(pnode_name,
10109 zip(tmp_disks, rename_to))
10110 result.Raise("Failed to rename adoped LVs")
10112 feedback_fn("* creating instance disks...")
10114 _CreateDisks(self, iobj)
10115 except errors.OpExecError:
10116 self.LogWarning("Device creation failed, reverting...")
10118 _RemoveDisks(self, iobj)
10120 self.cfg.ReleaseDRBDMinors(instance)
10123 feedback_fn("adding instance %s to cluster config" % instance)
10125 self.cfg.AddInstance(iobj, self.proc.GetECId())
10127 # Declare that we don't want to remove the instance lock anymore, as we've
10128 # added the instance to the config
10129 del self.remove_locks[locking.LEVEL_INSTANCE]
10131 if self.op.mode == constants.INSTANCE_IMPORT:
10132 # Release unused nodes
10133 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10135 # Release all nodes
10136 _ReleaseLocks(self, locking.LEVEL_NODE)
10139 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10140 feedback_fn("* wiping instance disks...")
10142 _WipeDisks(self, iobj)
10143 except errors.OpExecError, err:
10144 logging.exception("Wiping disks failed")
10145 self.LogWarning("Wiping instance disks failed (%s)", err)
10149 # Something is already wrong with the disks, don't do anything else
10151 elif self.op.wait_for_sync:
10152 disk_abort = not _WaitForSync(self, iobj)
10153 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10154 # make sure the disks are not degraded (still sync-ing is ok)
10155 feedback_fn("* checking mirrors status")
10156 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10161 _RemoveDisks(self, iobj)
10162 self.cfg.RemoveInstance(iobj.name)
10163 # Make sure the instance lock gets removed
10164 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10165 raise errors.OpExecError("There are some degraded disks for"
10168 # Release all node resource locks
10169 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10171 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10172 # we need to set the disks ID to the primary node, since the
10173 # preceding code might or might have not done it, depending on
10174 # disk template and other options
10175 for disk in iobj.disks:
10176 self.cfg.SetDiskID(disk, pnode_name)
10177 if self.op.mode == constants.INSTANCE_CREATE:
10178 if not self.op.no_install:
10179 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10180 not self.op.wait_for_sync)
10182 feedback_fn("* pausing disk sync to install instance OS")
10183 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10186 for idx, success in enumerate(result.payload):
10188 logging.warn("pause-sync of instance %s for disk %d failed",
10191 feedback_fn("* running the instance OS create scripts...")
10192 # FIXME: pass debug option from opcode to backend
10194 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10195 self.op.debug_level)
10197 feedback_fn("* resuming disk sync")
10198 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10201 for idx, success in enumerate(result.payload):
10203 logging.warn("resume-sync of instance %s for disk %d failed",
10206 os_add_result.Raise("Could not add os for instance %s"
10207 " on node %s" % (instance, pnode_name))
10210 if self.op.mode == constants.INSTANCE_IMPORT:
10211 feedback_fn("* running the instance OS import scripts...")
10215 for idx, image in enumerate(self.src_images):
10219 # FIXME: pass debug option from opcode to backend
10220 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10221 constants.IEIO_FILE, (image, ),
10222 constants.IEIO_SCRIPT,
10223 (iobj.disks[idx], idx),
10225 transfers.append(dt)
10228 masterd.instance.TransferInstanceData(self, feedback_fn,
10229 self.op.src_node, pnode_name,
10230 self.pnode.secondary_ip,
10232 if not compat.all(import_result):
10233 self.LogWarning("Some disks for instance %s on node %s were not"
10234 " imported successfully" % (instance, pnode_name))
10236 rename_from = self._old_instance_name
10238 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10239 feedback_fn("* preparing remote import...")
10240 # The source cluster will stop the instance before attempting to make
10241 # a connection. In some cases stopping an instance can take a long
10242 # time, hence the shutdown timeout is added to the connection
10244 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10245 self.op.source_shutdown_timeout)
10246 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10248 assert iobj.primary_node == self.pnode.name
10250 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10251 self.source_x509_ca,
10252 self._cds, timeouts)
10253 if not compat.all(disk_results):
10254 # TODO: Should the instance still be started, even if some disks
10255 # failed to import (valid for local imports, too)?
10256 self.LogWarning("Some disks for instance %s on node %s were not"
10257 " imported successfully" % (instance, pnode_name))
10259 rename_from = self.source_instance_name
10262 # also checked in the prereq part
10263 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10266 # Run rename script on newly imported instance
10267 assert iobj.name == instance
10268 feedback_fn("Running rename script for %s" % instance)
10269 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10271 self.op.debug_level)
10272 if result.fail_msg:
10273 self.LogWarning("Failed to run rename script for %s on node"
10274 " %s: %s" % (instance, pnode_name, result.fail_msg))
10276 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10279 iobj.admin_state = constants.ADMINST_UP
10280 self.cfg.Update(iobj, feedback_fn)
10281 logging.info("Starting instance %s on node %s", instance, pnode_name)
10282 feedback_fn("* starting instance...")
10283 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10285 result.Raise("Could not start instance")
10287 return list(iobj.all_nodes)
10290 def _CheckRADOSFreeSpace():
10291 """Compute disk size requirements inside the RADOS cluster.
10294 # For the RADOS cluster we assume there is always enough space.
10298 class LUInstanceConsole(NoHooksLU):
10299 """Connect to an instance's console.
10301 This is somewhat special in that it returns the command line that
10302 you need to run on the master node in order to connect to the
10308 def ExpandNames(self):
10309 self.share_locks = _ShareAll()
10310 self._ExpandAndLockInstance()
10312 def CheckPrereq(self):
10313 """Check prerequisites.
10315 This checks that the instance is in the cluster.
10318 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10319 assert self.instance is not None, \
10320 "Cannot retrieve locked instance %s" % self.op.instance_name
10321 _CheckNodeOnline(self, self.instance.primary_node)
10323 def Exec(self, feedback_fn):
10324 """Connect to the console of an instance
10327 instance = self.instance
10328 node = instance.primary_node
10330 node_insts = self.rpc.call_instance_list([node],
10331 [instance.hypervisor])[node]
10332 node_insts.Raise("Can't get node information from %s" % node)
10334 if instance.name not in node_insts.payload:
10335 if instance.admin_state == constants.ADMINST_UP:
10336 state = constants.INSTST_ERRORDOWN
10337 elif instance.admin_state == constants.ADMINST_DOWN:
10338 state = constants.INSTST_ADMINDOWN
10340 state = constants.INSTST_ADMINOFFLINE
10341 raise errors.OpExecError("Instance %s is not running (state %s)" %
10342 (instance.name, state))
10344 logging.debug("Connecting to console of %s on %s", instance.name, node)
10346 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10349 def _GetInstanceConsole(cluster, instance):
10350 """Returns console information for an instance.
10352 @type cluster: L{objects.Cluster}
10353 @type instance: L{objects.Instance}
10357 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10358 # beparams and hvparams are passed separately, to avoid editing the
10359 # instance and then saving the defaults in the instance itself.
10360 hvparams = cluster.FillHV(instance)
10361 beparams = cluster.FillBE(instance)
10362 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10364 assert console.instance == instance.name
10365 assert console.Validate()
10367 return console.ToDict()
10370 class LUInstanceReplaceDisks(LogicalUnit):
10371 """Replace the disks of an instance.
10374 HPATH = "mirrors-replace"
10375 HTYPE = constants.HTYPE_INSTANCE
10378 def CheckArguments(self):
10379 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10380 self.op.iallocator)
10382 def ExpandNames(self):
10383 self._ExpandAndLockInstance()
10385 assert locking.LEVEL_NODE not in self.needed_locks
10386 assert locking.LEVEL_NODE_RES not in self.needed_locks
10387 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10389 assert self.op.iallocator is None or self.op.remote_node is None, \
10390 "Conflicting options"
10392 if self.op.remote_node is not None:
10393 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10395 # Warning: do not remove the locking of the new secondary here
10396 # unless DRBD8.AddChildren is changed to work in parallel;
10397 # currently it doesn't since parallel invocations of
10398 # FindUnusedMinor will conflict
10399 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10400 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10402 self.needed_locks[locking.LEVEL_NODE] = []
10403 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10405 if self.op.iallocator is not None:
10406 # iallocator will select a new node in the same group
10407 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10409 self.needed_locks[locking.LEVEL_NODE_RES] = []
10411 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10412 self.op.iallocator, self.op.remote_node,
10413 self.op.disks, False, self.op.early_release,
10414 self.op.ignore_ipolicy)
10416 self.tasklets = [self.replacer]
10418 def DeclareLocks(self, level):
10419 if level == locking.LEVEL_NODEGROUP:
10420 assert self.op.remote_node is None
10421 assert self.op.iallocator is not None
10422 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10424 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10425 # Lock all groups used by instance optimistically; this requires going
10426 # via the node before it's locked, requiring verification later on
10427 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10428 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10430 elif level == locking.LEVEL_NODE:
10431 if self.op.iallocator is not None:
10432 assert self.op.remote_node is None
10433 assert not self.needed_locks[locking.LEVEL_NODE]
10435 # Lock member nodes of all locked groups
10436 self.needed_locks[locking.LEVEL_NODE] = [node_name
10437 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10438 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10440 self._LockInstancesNodes()
10441 elif level == locking.LEVEL_NODE_RES:
10443 self.needed_locks[locking.LEVEL_NODE_RES] = \
10444 self.needed_locks[locking.LEVEL_NODE]
10446 def BuildHooksEnv(self):
10447 """Build hooks env.
10449 This runs on the master, the primary and all the secondaries.
10452 instance = self.replacer.instance
10454 "MODE": self.op.mode,
10455 "NEW_SECONDARY": self.op.remote_node,
10456 "OLD_SECONDARY": instance.secondary_nodes[0],
10458 env.update(_BuildInstanceHookEnvByObject(self, instance))
10461 def BuildHooksNodes(self):
10462 """Build hooks nodes.
10465 instance = self.replacer.instance
10467 self.cfg.GetMasterNode(),
10468 instance.primary_node,
10470 if self.op.remote_node is not None:
10471 nl.append(self.op.remote_node)
10474 def CheckPrereq(self):
10475 """Check prerequisites.
10478 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10479 self.op.iallocator is None)
10481 # Verify if node group locks are still correct
10482 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10484 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10486 return LogicalUnit.CheckPrereq(self)
10489 class TLReplaceDisks(Tasklet):
10490 """Replaces disks for an instance.
10492 Note: Locking is not within the scope of this class.
10495 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10496 disks, delay_iallocator, early_release, ignore_ipolicy):
10497 """Initializes this class.
10500 Tasklet.__init__(self, lu)
10503 self.instance_name = instance_name
10505 self.iallocator_name = iallocator_name
10506 self.remote_node = remote_node
10508 self.delay_iallocator = delay_iallocator
10509 self.early_release = early_release
10510 self.ignore_ipolicy = ignore_ipolicy
10513 self.instance = None
10514 self.new_node = None
10515 self.target_node = None
10516 self.other_node = None
10517 self.remote_node_info = None
10518 self.node_secondary_ip = None
10521 def CheckArguments(mode, remote_node, iallocator):
10522 """Helper function for users of this class.
10525 # check for valid parameter combination
10526 if mode == constants.REPLACE_DISK_CHG:
10527 if remote_node is None and iallocator is None:
10528 raise errors.OpPrereqError("When changing the secondary either an"
10529 " iallocator script must be used or the"
10530 " new node given", errors.ECODE_INVAL)
10532 if remote_node is not None and iallocator is not None:
10533 raise errors.OpPrereqError("Give either the iallocator or the new"
10534 " secondary, not both", errors.ECODE_INVAL)
10536 elif remote_node is not None or iallocator is not None:
10537 # Not replacing the secondary
10538 raise errors.OpPrereqError("The iallocator and new node options can"
10539 " only be used when changing the"
10540 " secondary node", errors.ECODE_INVAL)
10543 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10544 """Compute a new secondary node using an IAllocator.
10547 ial = IAllocator(lu.cfg, lu.rpc,
10548 mode=constants.IALLOCATOR_MODE_RELOC,
10549 name=instance_name,
10550 relocate_from=list(relocate_from))
10552 ial.Run(iallocator_name)
10554 if not ial.success:
10555 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10556 " %s" % (iallocator_name, ial.info),
10557 errors.ECODE_NORES)
10559 if len(ial.result) != ial.required_nodes:
10560 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10561 " of nodes (%s), required %s" %
10563 len(ial.result), ial.required_nodes),
10564 errors.ECODE_FAULT)
10566 remote_node_name = ial.result[0]
10568 lu.LogInfo("Selected new secondary for instance '%s': %s",
10569 instance_name, remote_node_name)
10571 return remote_node_name
10573 def _FindFaultyDisks(self, node_name):
10574 """Wrapper for L{_FindFaultyInstanceDisks}.
10577 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10580 def _CheckDisksActivated(self, instance):
10581 """Checks if the instance disks are activated.
10583 @param instance: The instance to check disks
10584 @return: True if they are activated, False otherwise
10587 nodes = instance.all_nodes
10589 for idx, dev in enumerate(instance.disks):
10591 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10592 self.cfg.SetDiskID(dev, node)
10594 result = _BlockdevFind(self, node, dev, instance)
10598 elif result.fail_msg or not result.payload:
10603 def CheckPrereq(self):
10604 """Check prerequisites.
10606 This checks that the instance is in the cluster.
10609 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10610 assert instance is not None, \
10611 "Cannot retrieve locked instance %s" % self.instance_name
10613 if instance.disk_template != constants.DT_DRBD8:
10614 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10615 " instances", errors.ECODE_INVAL)
10617 if len(instance.secondary_nodes) != 1:
10618 raise errors.OpPrereqError("The instance has a strange layout,"
10619 " expected one secondary but found %d" %
10620 len(instance.secondary_nodes),
10621 errors.ECODE_FAULT)
10623 if not self.delay_iallocator:
10624 self._CheckPrereq2()
10626 def _CheckPrereq2(self):
10627 """Check prerequisites, second part.
10629 This function should always be part of CheckPrereq. It was separated and is
10630 now called from Exec because during node evacuation iallocator was only
10631 called with an unmodified cluster model, not taking planned changes into
10635 instance = self.instance
10636 secondary_node = instance.secondary_nodes[0]
10638 if self.iallocator_name is None:
10639 remote_node = self.remote_node
10641 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10642 instance.name, instance.secondary_nodes)
10644 if remote_node is None:
10645 self.remote_node_info = None
10647 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10648 "Remote node '%s' is not locked" % remote_node
10650 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10651 assert self.remote_node_info is not None, \
10652 "Cannot retrieve locked node %s" % remote_node
10654 if remote_node == self.instance.primary_node:
10655 raise errors.OpPrereqError("The specified node is the primary node of"
10656 " the instance", errors.ECODE_INVAL)
10658 if remote_node == secondary_node:
10659 raise errors.OpPrereqError("The specified node is already the"
10660 " secondary node of the instance",
10661 errors.ECODE_INVAL)
10663 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10664 constants.REPLACE_DISK_CHG):
10665 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10666 errors.ECODE_INVAL)
10668 if self.mode == constants.REPLACE_DISK_AUTO:
10669 if not self._CheckDisksActivated(instance):
10670 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10671 " first" % self.instance_name,
10672 errors.ECODE_STATE)
10673 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10674 faulty_secondary = self._FindFaultyDisks(secondary_node)
10676 if faulty_primary and faulty_secondary:
10677 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10678 " one node and can not be repaired"
10679 " automatically" % self.instance_name,
10680 errors.ECODE_STATE)
10683 self.disks = faulty_primary
10684 self.target_node = instance.primary_node
10685 self.other_node = secondary_node
10686 check_nodes = [self.target_node, self.other_node]
10687 elif faulty_secondary:
10688 self.disks = faulty_secondary
10689 self.target_node = secondary_node
10690 self.other_node = instance.primary_node
10691 check_nodes = [self.target_node, self.other_node]
10697 # Non-automatic modes
10698 if self.mode == constants.REPLACE_DISK_PRI:
10699 self.target_node = instance.primary_node
10700 self.other_node = secondary_node
10701 check_nodes = [self.target_node, self.other_node]
10703 elif self.mode == constants.REPLACE_DISK_SEC:
10704 self.target_node = secondary_node
10705 self.other_node = instance.primary_node
10706 check_nodes = [self.target_node, self.other_node]
10708 elif self.mode == constants.REPLACE_DISK_CHG:
10709 self.new_node = remote_node
10710 self.other_node = instance.primary_node
10711 self.target_node = secondary_node
10712 check_nodes = [self.new_node, self.other_node]
10714 _CheckNodeNotDrained(self.lu, remote_node)
10715 _CheckNodeVmCapable(self.lu, remote_node)
10717 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10718 assert old_node_info is not None
10719 if old_node_info.offline and not self.early_release:
10720 # doesn't make sense to delay the release
10721 self.early_release = True
10722 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10723 " early-release mode", secondary_node)
10726 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10729 # If not specified all disks should be replaced
10731 self.disks = range(len(self.instance.disks))
10733 # TODO: This is ugly, but right now we can't distinguish between internal
10734 # submitted opcode and external one. We should fix that.
10735 if self.remote_node_info:
10736 # We change the node, lets verify it still meets instance policy
10737 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10738 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10740 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10741 ignore=self.ignore_ipolicy)
10743 for node in check_nodes:
10744 _CheckNodeOnline(self.lu, node)
10746 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10749 if node_name is not None)
10751 # Release unneeded node and node resource locks
10752 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10753 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10755 # Release any owned node group
10756 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10757 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10759 # Check whether disks are valid
10760 for disk_idx in self.disks:
10761 instance.FindDisk(disk_idx)
10763 # Get secondary node IP addresses
10764 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10765 in self.cfg.GetMultiNodeInfo(touched_nodes))
10767 def Exec(self, feedback_fn):
10768 """Execute disk replacement.
10770 This dispatches the disk replacement to the appropriate handler.
10773 if self.delay_iallocator:
10774 self._CheckPrereq2()
10777 # Verify owned locks before starting operation
10778 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10779 assert set(owned_nodes) == set(self.node_secondary_ip), \
10780 ("Incorrect node locks, owning %s, expected %s" %
10781 (owned_nodes, self.node_secondary_ip.keys()))
10782 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10783 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10785 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10786 assert list(owned_instances) == [self.instance_name], \
10787 "Instance '%s' not locked" % self.instance_name
10789 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10790 "Should not own any node group lock at this point"
10793 feedback_fn("No disks need replacement")
10796 feedback_fn("Replacing disk(s) %s for %s" %
10797 (utils.CommaJoin(self.disks), self.instance.name))
10799 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10801 # Activate the instance disks if we're replacing them on a down instance
10803 _StartInstanceDisks(self.lu, self.instance, True)
10806 # Should we replace the secondary node?
10807 if self.new_node is not None:
10808 fn = self._ExecDrbd8Secondary
10810 fn = self._ExecDrbd8DiskOnly
10812 result = fn(feedback_fn)
10814 # Deactivate the instance disks if we're replacing them on a
10817 _SafeShutdownInstanceDisks(self.lu, self.instance)
10819 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10822 # Verify owned locks
10823 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10824 nodes = frozenset(self.node_secondary_ip)
10825 assert ((self.early_release and not owned_nodes) or
10826 (not self.early_release and not (set(owned_nodes) - nodes))), \
10827 ("Not owning the correct locks, early_release=%s, owned=%r,"
10828 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10832 def _CheckVolumeGroup(self, nodes):
10833 self.lu.LogInfo("Checking volume groups")
10835 vgname = self.cfg.GetVGName()
10837 # Make sure volume group exists on all involved nodes
10838 results = self.rpc.call_vg_list(nodes)
10840 raise errors.OpExecError("Can't list volume groups on the nodes")
10843 res = results[node]
10844 res.Raise("Error checking node %s" % node)
10845 if vgname not in res.payload:
10846 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10849 def _CheckDisksExistence(self, nodes):
10850 # Check disk existence
10851 for idx, dev in enumerate(self.instance.disks):
10852 if idx not in self.disks:
10856 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10857 self.cfg.SetDiskID(dev, node)
10859 result = _BlockdevFind(self, node, dev, self.instance)
10861 msg = result.fail_msg
10862 if msg or not result.payload:
10864 msg = "disk not found"
10865 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10868 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10869 for idx, dev in enumerate(self.instance.disks):
10870 if idx not in self.disks:
10873 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10876 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10877 on_primary, ldisk=ldisk):
10878 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10879 " replace disks for instance %s" %
10880 (node_name, self.instance.name))
10882 def _CreateNewStorage(self, node_name):
10883 """Create new storage on the primary or secondary node.
10885 This is only used for same-node replaces, not for changing the
10886 secondary node, hence we don't want to modify the existing disk.
10891 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10892 for idx, dev in enumerate(disks):
10893 if idx not in self.disks:
10896 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10898 self.cfg.SetDiskID(dev, node_name)
10900 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10901 names = _GenerateUniqueNames(self.lu, lv_names)
10903 (data_disk, meta_disk) = dev.children
10904 vg_data = data_disk.logical_id[0]
10905 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10906 logical_id=(vg_data, names[0]),
10907 params=data_disk.params)
10908 vg_meta = meta_disk.logical_id[0]
10909 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10910 logical_id=(vg_meta, names[1]),
10911 params=meta_disk.params)
10913 new_lvs = [lv_data, lv_meta]
10914 old_lvs = [child.Copy() for child in dev.children]
10915 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10917 # we pass force_create=True to force the LVM creation
10918 for new_lv in new_lvs:
10919 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10920 _GetInstanceInfoText(self.instance), False)
10924 def _CheckDevices(self, node_name, iv_names):
10925 for name, (dev, _, _) in iv_names.iteritems():
10926 self.cfg.SetDiskID(dev, node_name)
10928 result = _BlockdevFind(self, node_name, dev, self.instance)
10930 msg = result.fail_msg
10931 if msg or not result.payload:
10933 msg = "disk not found"
10934 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10937 if result.payload.is_degraded:
10938 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10940 def _RemoveOldStorage(self, node_name, iv_names):
10941 for name, (_, old_lvs, _) in iv_names.iteritems():
10942 self.lu.LogInfo("Remove logical volumes for %s" % name)
10945 self.cfg.SetDiskID(lv, node_name)
10947 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10949 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10950 hint="remove unused LVs manually")
10952 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10953 """Replace a disk on the primary or secondary for DRBD 8.
10955 The algorithm for replace is quite complicated:
10957 1. for each disk to be replaced:
10959 1. create new LVs on the target node with unique names
10960 1. detach old LVs from the drbd device
10961 1. rename old LVs to name_replaced.<time_t>
10962 1. rename new LVs to old LVs
10963 1. attach the new LVs (with the old names now) to the drbd device
10965 1. wait for sync across all devices
10967 1. for each modified disk:
10969 1. remove old LVs (which have the name name_replaces.<time_t>)
10971 Failures are not very well handled.
10976 # Step: check device activation
10977 self.lu.LogStep(1, steps_total, "Check device existence")
10978 self._CheckDisksExistence([self.other_node, self.target_node])
10979 self._CheckVolumeGroup([self.target_node, self.other_node])
10981 # Step: check other node consistency
10982 self.lu.LogStep(2, steps_total, "Check peer consistency")
10983 self._CheckDisksConsistency(self.other_node,
10984 self.other_node == self.instance.primary_node,
10987 # Step: create new storage
10988 self.lu.LogStep(3, steps_total, "Allocate new storage")
10989 iv_names = self._CreateNewStorage(self.target_node)
10991 # Step: for each lv, detach+rename*2+attach
10992 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10993 for dev, old_lvs, new_lvs in iv_names.itervalues():
10994 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10996 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10998 result.Raise("Can't detach drbd from local storage on node"
10999 " %s for device %s" % (self.target_node, dev.iv_name))
11001 #cfg.Update(instance)
11003 # ok, we created the new LVs, so now we know we have the needed
11004 # storage; as such, we proceed on the target node to rename
11005 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11006 # using the assumption that logical_id == physical_id (which in
11007 # turn is the unique_id on that node)
11009 # FIXME(iustin): use a better name for the replaced LVs
11010 temp_suffix = int(time.time())
11011 ren_fn = lambda d, suff: (d.physical_id[0],
11012 d.physical_id[1] + "_replaced-%s" % suff)
11014 # Build the rename list based on what LVs exist on the node
11015 rename_old_to_new = []
11016 for to_ren in old_lvs:
11017 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11018 if not result.fail_msg and result.payload:
11020 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11022 self.lu.LogInfo("Renaming the old LVs on the target node")
11023 result = self.rpc.call_blockdev_rename(self.target_node,
11025 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11027 # Now we rename the new LVs to the old LVs
11028 self.lu.LogInfo("Renaming the new LVs on the target node")
11029 rename_new_to_old = [(new, old.physical_id)
11030 for old, new in zip(old_lvs, new_lvs)]
11031 result = self.rpc.call_blockdev_rename(self.target_node,
11033 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11035 # Intermediate steps of in memory modifications
11036 for old, new in zip(old_lvs, new_lvs):
11037 new.logical_id = old.logical_id
11038 self.cfg.SetDiskID(new, self.target_node)
11040 # We need to modify old_lvs so that removal later removes the
11041 # right LVs, not the newly added ones; note that old_lvs is a
11043 for disk in old_lvs:
11044 disk.logical_id = ren_fn(disk, temp_suffix)
11045 self.cfg.SetDiskID(disk, self.target_node)
11047 # Now that the new lvs have the old name, we can add them to the device
11048 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11049 result = self.rpc.call_blockdev_addchildren(self.target_node,
11050 (dev, self.instance), new_lvs)
11051 msg = result.fail_msg
11053 for new_lv in new_lvs:
11054 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11057 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11058 hint=("cleanup manually the unused logical"
11060 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11062 cstep = itertools.count(5)
11064 if self.early_release:
11065 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11066 self._RemoveOldStorage(self.target_node, iv_names)
11067 # TODO: Check if releasing locks early still makes sense
11068 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11070 # Release all resource locks except those used by the instance
11071 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11072 keep=self.node_secondary_ip.keys())
11074 # Release all node locks while waiting for sync
11075 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11077 # TODO: Can the instance lock be downgraded here? Take the optional disk
11078 # shutdown in the caller into consideration.
11081 # This can fail as the old devices are degraded and _WaitForSync
11082 # does a combined result over all disks, so we don't check its return value
11083 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11084 _WaitForSync(self.lu, self.instance)
11086 # Check all devices manually
11087 self._CheckDevices(self.instance.primary_node, iv_names)
11089 # Step: remove old storage
11090 if not self.early_release:
11091 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11092 self._RemoveOldStorage(self.target_node, iv_names)
11094 def _ExecDrbd8Secondary(self, feedback_fn):
11095 """Replace the secondary node for DRBD 8.
11097 The algorithm for replace is quite complicated:
11098 - for all disks of the instance:
11099 - create new LVs on the new node with same names
11100 - shutdown the drbd device on the old secondary
11101 - disconnect the drbd network on the primary
11102 - create the drbd device on the new secondary
11103 - network attach the drbd on the primary, using an artifice:
11104 the drbd code for Attach() will connect to the network if it
11105 finds a device which is connected to the good local disks but
11106 not network enabled
11107 - wait for sync across all devices
11108 - remove all disks from the old secondary
11110 Failures are not very well handled.
11115 pnode = self.instance.primary_node
11117 # Step: check device activation
11118 self.lu.LogStep(1, steps_total, "Check device existence")
11119 self._CheckDisksExistence([self.instance.primary_node])
11120 self._CheckVolumeGroup([self.instance.primary_node])
11122 # Step: check other node consistency
11123 self.lu.LogStep(2, steps_total, "Check peer consistency")
11124 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11126 # Step: create new storage
11127 self.lu.LogStep(3, steps_total, "Allocate new storage")
11128 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11129 for idx, dev in enumerate(disks):
11130 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11131 (self.new_node, idx))
11132 # we pass force_create=True to force LVM creation
11133 for new_lv in dev.children:
11134 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11135 True, _GetInstanceInfoText(self.instance), False)
11137 # Step 4: dbrd minors and drbd setups changes
11138 # after this, we must manually remove the drbd minors on both the
11139 # error and the success paths
11140 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11141 minors = self.cfg.AllocateDRBDMinor([self.new_node
11142 for dev in self.instance.disks],
11143 self.instance.name)
11144 logging.debug("Allocated minors %r", minors)
11147 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11148 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11149 (self.new_node, idx))
11150 # create new devices on new_node; note that we create two IDs:
11151 # one without port, so the drbd will be activated without
11152 # networking information on the new node at this stage, and one
11153 # with network, for the latter activation in step 4
11154 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11155 if self.instance.primary_node == o_node1:
11158 assert self.instance.primary_node == o_node2, "Three-node instance?"
11161 new_alone_id = (self.instance.primary_node, self.new_node, None,
11162 p_minor, new_minor, o_secret)
11163 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11164 p_minor, new_minor, o_secret)
11166 iv_names[idx] = (dev, dev.children, new_net_id)
11167 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11169 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11170 logical_id=new_alone_id,
11171 children=dev.children,
11174 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11177 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11179 _GetInstanceInfoText(self.instance), False)
11180 except errors.GenericError:
11181 self.cfg.ReleaseDRBDMinors(self.instance.name)
11184 # We have new devices, shutdown the drbd on the old secondary
11185 for idx, dev in enumerate(self.instance.disks):
11186 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11187 self.cfg.SetDiskID(dev, self.target_node)
11188 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11189 (dev, self.instance)).fail_msg
11191 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11192 "node: %s" % (idx, msg),
11193 hint=("Please cleanup this device manually as"
11194 " soon as possible"))
11196 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11197 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11198 self.instance.disks)[pnode]
11200 msg = result.fail_msg
11202 # detaches didn't succeed (unlikely)
11203 self.cfg.ReleaseDRBDMinors(self.instance.name)
11204 raise errors.OpExecError("Can't detach the disks from the network on"
11205 " old node: %s" % (msg,))
11207 # if we managed to detach at least one, we update all the disks of
11208 # the instance to point to the new secondary
11209 self.lu.LogInfo("Updating instance configuration")
11210 for dev, _, new_logical_id in iv_names.itervalues():
11211 dev.logical_id = new_logical_id
11212 self.cfg.SetDiskID(dev, self.instance.primary_node)
11214 self.cfg.Update(self.instance, feedback_fn)
11216 # Release all node locks (the configuration has been updated)
11217 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11219 # and now perform the drbd attach
11220 self.lu.LogInfo("Attaching primary drbds to new secondary"
11221 " (standalone => connected)")
11222 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11224 self.node_secondary_ip,
11225 (self.instance.disks, self.instance),
11226 self.instance.name,
11228 for to_node, to_result in result.items():
11229 msg = to_result.fail_msg
11231 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11233 hint=("please do a gnt-instance info to see the"
11234 " status of disks"))
11236 cstep = itertools.count(5)
11238 if self.early_release:
11239 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11240 self._RemoveOldStorage(self.target_node, iv_names)
11241 # TODO: Check if releasing locks early still makes sense
11242 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11244 # Release all resource locks except those used by the instance
11245 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11246 keep=self.node_secondary_ip.keys())
11248 # TODO: Can the instance lock be downgraded here? Take the optional disk
11249 # shutdown in the caller into consideration.
11252 # This can fail as the old devices are degraded and _WaitForSync
11253 # does a combined result over all disks, so we don't check its return value
11254 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11255 _WaitForSync(self.lu, self.instance)
11257 # Check all devices manually
11258 self._CheckDevices(self.instance.primary_node, iv_names)
11260 # Step: remove old storage
11261 if not self.early_release:
11262 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11263 self._RemoveOldStorage(self.target_node, iv_names)
11266 class LURepairNodeStorage(NoHooksLU):
11267 """Repairs the volume group on a node.
11272 def CheckArguments(self):
11273 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11275 storage_type = self.op.storage_type
11277 if (constants.SO_FIX_CONSISTENCY not in
11278 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11279 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11280 " repaired" % storage_type,
11281 errors.ECODE_INVAL)
11283 def ExpandNames(self):
11284 self.needed_locks = {
11285 locking.LEVEL_NODE: [self.op.node_name],
11288 def _CheckFaultyDisks(self, instance, node_name):
11289 """Ensure faulty disks abort the opcode or at least warn."""
11291 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11293 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11294 " node '%s'" % (instance.name, node_name),
11295 errors.ECODE_STATE)
11296 except errors.OpPrereqError, err:
11297 if self.op.ignore_consistency:
11298 self.proc.LogWarning(str(err.args[0]))
11302 def CheckPrereq(self):
11303 """Check prerequisites.
11306 # Check whether any instance on this node has faulty disks
11307 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11308 if inst.admin_state != constants.ADMINST_UP:
11310 check_nodes = set(inst.all_nodes)
11311 check_nodes.discard(self.op.node_name)
11312 for inst_node_name in check_nodes:
11313 self._CheckFaultyDisks(inst, inst_node_name)
11315 def Exec(self, feedback_fn):
11316 feedback_fn("Repairing storage unit '%s' on %s ..." %
11317 (self.op.name, self.op.node_name))
11319 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11320 result = self.rpc.call_storage_execute(self.op.node_name,
11321 self.op.storage_type, st_args,
11323 constants.SO_FIX_CONSISTENCY)
11324 result.Raise("Failed to repair storage unit '%s' on %s" %
11325 (self.op.name, self.op.node_name))
11328 class LUNodeEvacuate(NoHooksLU):
11329 """Evacuates instances off a list of nodes.
11334 _MODE2IALLOCATOR = {
11335 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11336 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11337 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11339 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11340 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11341 constants.IALLOCATOR_NEVAC_MODES)
11343 def CheckArguments(self):
11344 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11346 def ExpandNames(self):
11347 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11349 if self.op.remote_node is not None:
11350 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11351 assert self.op.remote_node
11353 if self.op.remote_node == self.op.node_name:
11354 raise errors.OpPrereqError("Can not use evacuated node as a new"
11355 " secondary node", errors.ECODE_INVAL)
11357 if self.op.mode != constants.NODE_EVAC_SEC:
11358 raise errors.OpPrereqError("Without the use of an iallocator only"
11359 " secondary instances can be evacuated",
11360 errors.ECODE_INVAL)
11363 self.share_locks = _ShareAll()
11364 self.needed_locks = {
11365 locking.LEVEL_INSTANCE: [],
11366 locking.LEVEL_NODEGROUP: [],
11367 locking.LEVEL_NODE: [],
11370 # Determine nodes (via group) optimistically, needs verification once locks
11371 # have been acquired
11372 self.lock_nodes = self._DetermineNodes()
11374 def _DetermineNodes(self):
11375 """Gets the list of nodes to operate on.
11378 if self.op.remote_node is None:
11379 # Iallocator will choose any node(s) in the same group
11380 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11382 group_nodes = frozenset([self.op.remote_node])
11384 # Determine nodes to be locked
11385 return set([self.op.node_name]) | group_nodes
11387 def _DetermineInstances(self):
11388 """Builds list of instances to operate on.
11391 assert self.op.mode in constants.NODE_EVAC_MODES
11393 if self.op.mode == constants.NODE_EVAC_PRI:
11394 # Primary instances only
11395 inst_fn = _GetNodePrimaryInstances
11396 assert self.op.remote_node is None, \
11397 "Evacuating primary instances requires iallocator"
11398 elif self.op.mode == constants.NODE_EVAC_SEC:
11399 # Secondary instances only
11400 inst_fn = _GetNodeSecondaryInstances
11403 assert self.op.mode == constants.NODE_EVAC_ALL
11404 inst_fn = _GetNodeInstances
11405 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11407 raise errors.OpPrereqError("Due to an issue with the iallocator"
11408 " interface it is not possible to evacuate"
11409 " all instances at once; specify explicitly"
11410 " whether to evacuate primary or secondary"
11412 errors.ECODE_INVAL)
11414 return inst_fn(self.cfg, self.op.node_name)
11416 def DeclareLocks(self, level):
11417 if level == locking.LEVEL_INSTANCE:
11418 # Lock instances optimistically, needs verification once node and group
11419 # locks have been acquired
11420 self.needed_locks[locking.LEVEL_INSTANCE] = \
11421 set(i.name for i in self._DetermineInstances())
11423 elif level == locking.LEVEL_NODEGROUP:
11424 # Lock node groups for all potential target nodes optimistically, needs
11425 # verification once nodes have been acquired
11426 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11427 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11429 elif level == locking.LEVEL_NODE:
11430 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11432 def CheckPrereq(self):
11434 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11435 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11436 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11438 need_nodes = self._DetermineNodes()
11440 if not owned_nodes.issuperset(need_nodes):
11441 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11442 " locks were acquired, current nodes are"
11443 " are '%s', used to be '%s'; retry the"
11445 (self.op.node_name,
11446 utils.CommaJoin(need_nodes),
11447 utils.CommaJoin(owned_nodes)),
11448 errors.ECODE_STATE)
11450 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11451 if owned_groups != wanted_groups:
11452 raise errors.OpExecError("Node groups changed since locks were acquired,"
11453 " current groups are '%s', used to be '%s';"
11454 " retry the operation" %
11455 (utils.CommaJoin(wanted_groups),
11456 utils.CommaJoin(owned_groups)))
11458 # Determine affected instances
11459 self.instances = self._DetermineInstances()
11460 self.instance_names = [i.name for i in self.instances]
11462 if set(self.instance_names) != owned_instances:
11463 raise errors.OpExecError("Instances on node '%s' changed since locks"
11464 " were acquired, current instances are '%s',"
11465 " used to be '%s'; retry the operation" %
11466 (self.op.node_name,
11467 utils.CommaJoin(self.instance_names),
11468 utils.CommaJoin(owned_instances)))
11470 if self.instance_names:
11471 self.LogInfo("Evacuating instances from node '%s': %s",
11473 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11475 self.LogInfo("No instances to evacuate from node '%s'",
11478 if self.op.remote_node is not None:
11479 for i in self.instances:
11480 if i.primary_node == self.op.remote_node:
11481 raise errors.OpPrereqError("Node %s is the primary node of"
11482 " instance %s, cannot use it as"
11484 (self.op.remote_node, i.name),
11485 errors.ECODE_INVAL)
11487 def Exec(self, feedback_fn):
11488 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11490 if not self.instance_names:
11491 # No instances to evacuate
11494 elif self.op.iallocator is not None:
11495 # TODO: Implement relocation to other group
11496 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11497 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11498 instances=list(self.instance_names))
11500 ial.Run(self.op.iallocator)
11502 if not ial.success:
11503 raise errors.OpPrereqError("Can't compute node evacuation using"
11504 " iallocator '%s': %s" %
11505 (self.op.iallocator, ial.info),
11506 errors.ECODE_NORES)
11508 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11510 elif self.op.remote_node is not None:
11511 assert self.op.mode == constants.NODE_EVAC_SEC
11513 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11514 remote_node=self.op.remote_node,
11516 mode=constants.REPLACE_DISK_CHG,
11517 early_release=self.op.early_release)]
11518 for instance_name in self.instance_names
11522 raise errors.ProgrammerError("No iallocator or remote node")
11524 return ResultWithJobs(jobs)
11527 def _SetOpEarlyRelease(early_release, op):
11528 """Sets C{early_release} flag on opcodes if available.
11532 op.early_release = early_release
11533 except AttributeError:
11534 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11539 def _NodeEvacDest(use_nodes, group, nodes):
11540 """Returns group or nodes depending on caller's choice.
11544 return utils.CommaJoin(nodes)
11549 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11550 """Unpacks the result of change-group and node-evacuate iallocator requests.
11552 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11553 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11555 @type lu: L{LogicalUnit}
11556 @param lu: Logical unit instance
11557 @type alloc_result: tuple/list
11558 @param alloc_result: Result from iallocator
11559 @type early_release: bool
11560 @param early_release: Whether to release locks early if possible
11561 @type use_nodes: bool
11562 @param use_nodes: Whether to display node names instead of groups
11565 (moved, failed, jobs) = alloc_result
11568 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11569 for (name, reason) in failed)
11570 lu.LogWarning("Unable to evacuate instances %s", failreason)
11571 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11574 lu.LogInfo("Instances to be moved: %s",
11575 utils.CommaJoin("%s (to %s)" %
11576 (name, _NodeEvacDest(use_nodes, group, nodes))
11577 for (name, group, nodes) in moved))
11579 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11580 map(opcodes.OpCode.LoadOpCode, ops))
11584 class LUInstanceGrowDisk(LogicalUnit):
11585 """Grow a disk of an instance.
11588 HPATH = "disk-grow"
11589 HTYPE = constants.HTYPE_INSTANCE
11592 def ExpandNames(self):
11593 self._ExpandAndLockInstance()
11594 self.needed_locks[locking.LEVEL_NODE] = []
11595 self.needed_locks[locking.LEVEL_NODE_RES] = []
11596 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11597 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11599 def DeclareLocks(self, level):
11600 if level == locking.LEVEL_NODE:
11601 self._LockInstancesNodes()
11602 elif level == locking.LEVEL_NODE_RES:
11604 self.needed_locks[locking.LEVEL_NODE_RES] = \
11605 self.needed_locks[locking.LEVEL_NODE][:]
11607 def BuildHooksEnv(self):
11608 """Build hooks env.
11610 This runs on the master, the primary and all the secondaries.
11614 "DISK": self.op.disk,
11615 "AMOUNT": self.op.amount,
11616 "ABSOLUTE": self.op.absolute,
11618 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11621 def BuildHooksNodes(self):
11622 """Build hooks nodes.
11625 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11628 def CheckPrereq(self):
11629 """Check prerequisites.
11631 This checks that the instance is in the cluster.
11634 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11635 assert instance is not None, \
11636 "Cannot retrieve locked instance %s" % self.op.instance_name
11637 nodenames = list(instance.all_nodes)
11638 for node in nodenames:
11639 _CheckNodeOnline(self, node)
11641 self.instance = instance
11643 if instance.disk_template not in constants.DTS_GROWABLE:
11644 raise errors.OpPrereqError("Instance's disk layout does not support"
11645 " growing", errors.ECODE_INVAL)
11647 self.disk = instance.FindDisk(self.op.disk)
11649 if self.op.absolute:
11650 self.target = self.op.amount
11651 self.delta = self.target - self.disk.size
11653 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11654 "current disk size (%s)" %
11655 (utils.FormatUnit(self.target, "h"),
11656 utils.FormatUnit(self.disk.size, "h")),
11657 errors.ECODE_STATE)
11659 self.delta = self.op.amount
11660 self.target = self.disk.size + self.delta
11662 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11663 utils.FormatUnit(self.delta, "h"),
11664 errors.ECODE_INVAL)
11666 if instance.disk_template not in (constants.DT_FILE,
11667 constants.DT_SHARED_FILE,
11669 # TODO: check the free disk space for file, when that feature will be
11671 _CheckNodesFreeDiskPerVG(self, nodenames,
11672 self.disk.ComputeGrowth(self.delta))
11674 def Exec(self, feedback_fn):
11675 """Execute disk grow.
11678 instance = self.instance
11681 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11682 assert (self.owned_locks(locking.LEVEL_NODE) ==
11683 self.owned_locks(locking.LEVEL_NODE_RES))
11685 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11687 raise errors.OpExecError("Cannot activate block device to grow")
11689 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11690 (self.op.disk, instance.name,
11691 utils.FormatUnit(self.delta, "h"),
11692 utils.FormatUnit(self.target, "h")))
11694 # First run all grow ops in dry-run mode
11695 for node in instance.all_nodes:
11696 self.cfg.SetDiskID(disk, node)
11697 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11699 result.Raise("Grow request failed to node %s" % node)
11701 # We know that (as far as we can test) operations across different
11702 # nodes will succeed, time to run it for real on the backing storage
11703 for node in instance.all_nodes:
11704 self.cfg.SetDiskID(disk, node)
11705 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11707 result.Raise("Grow request failed to node %s" % node)
11709 # And now execute it for logical storage, on the primary node
11710 node = instance.primary_node
11711 self.cfg.SetDiskID(disk, node)
11712 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11714 result.Raise("Grow request failed to node %s" % node)
11716 disk.RecordGrow(self.delta)
11717 self.cfg.Update(instance, feedback_fn)
11719 # Changes have been recorded, release node lock
11720 _ReleaseLocks(self, locking.LEVEL_NODE)
11722 # Downgrade lock while waiting for sync
11723 self.glm.downgrade(locking.LEVEL_INSTANCE)
11725 if self.op.wait_for_sync:
11726 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11728 self.proc.LogWarning("Disk sync-ing has not returned a good"
11729 " status; please check the instance")
11730 if instance.admin_state != constants.ADMINST_UP:
11731 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11732 elif instance.admin_state != constants.ADMINST_UP:
11733 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11734 " not supposed to be running because no wait for"
11735 " sync mode was requested")
11737 assert self.owned_locks(locking.LEVEL_NODE_RES)
11738 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11741 class LUInstanceQueryData(NoHooksLU):
11742 """Query runtime instance data.
11747 def ExpandNames(self):
11748 self.needed_locks = {}
11750 # Use locking if requested or when non-static information is wanted
11751 if not (self.op.static or self.op.use_locking):
11752 self.LogWarning("Non-static data requested, locks need to be acquired")
11753 self.op.use_locking = True
11755 if self.op.instances or not self.op.use_locking:
11756 # Expand instance names right here
11757 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11759 # Will use acquired locks
11760 self.wanted_names = None
11762 if self.op.use_locking:
11763 self.share_locks = _ShareAll()
11765 if self.wanted_names is None:
11766 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11768 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11770 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11771 self.needed_locks[locking.LEVEL_NODE] = []
11772 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11774 def DeclareLocks(self, level):
11775 if self.op.use_locking:
11776 if level == locking.LEVEL_NODEGROUP:
11777 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11779 # Lock all groups used by instances optimistically; this requires going
11780 # via the node before it's locked, requiring verification later on
11781 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11782 frozenset(group_uuid
11783 for instance_name in owned_instances
11785 self.cfg.GetInstanceNodeGroups(instance_name))
11787 elif level == locking.LEVEL_NODE:
11788 self._LockInstancesNodes()
11790 def CheckPrereq(self):
11791 """Check prerequisites.
11793 This only checks the optional instance list against the existing names.
11796 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11797 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11798 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11800 if self.wanted_names is None:
11801 assert self.op.use_locking, "Locking was not used"
11802 self.wanted_names = owned_instances
11804 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11806 if self.op.use_locking:
11807 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11810 assert not (owned_instances or owned_groups or owned_nodes)
11812 self.wanted_instances = instances.values()
11814 def _ComputeBlockdevStatus(self, node, instance, dev):
11815 """Returns the status of a block device
11818 if self.op.static or not node:
11821 self.cfg.SetDiskID(dev, node)
11823 result = self.rpc.call_blockdev_find(node, dev)
11827 result.Raise("Can't compute disk status for %s" % instance.name)
11829 status = result.payload
11833 return (status.dev_path, status.major, status.minor,
11834 status.sync_percent, status.estimated_time,
11835 status.is_degraded, status.ldisk_status)
11837 def _ComputeDiskStatus(self, instance, snode, dev):
11838 """Compute block device status.
11841 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11843 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11845 def _ComputeDiskStatusInner(self, instance, snode, dev):
11846 """Compute block device status.
11848 @attention: The device has to be annotated already.
11851 if dev.dev_type in constants.LDS_DRBD:
11852 # we change the snode then (otherwise we use the one passed in)
11853 if dev.logical_id[0] == instance.primary_node:
11854 snode = dev.logical_id[1]
11856 snode = dev.logical_id[0]
11858 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11860 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11863 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11870 "iv_name": dev.iv_name,
11871 "dev_type": dev.dev_type,
11872 "logical_id": dev.logical_id,
11873 "physical_id": dev.physical_id,
11874 "pstatus": dev_pstatus,
11875 "sstatus": dev_sstatus,
11876 "children": dev_children,
11881 def Exec(self, feedback_fn):
11882 """Gather and return data"""
11885 cluster = self.cfg.GetClusterInfo()
11887 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11888 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11890 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11891 for node in nodes.values()))
11893 group2name_fn = lambda uuid: groups[uuid].name
11895 for instance in self.wanted_instances:
11896 pnode = nodes[instance.primary_node]
11898 if self.op.static or pnode.offline:
11899 remote_state = None
11901 self.LogWarning("Primary node %s is marked offline, returning static"
11902 " information only for instance %s" %
11903 (pnode.name, instance.name))
11905 remote_info = self.rpc.call_instance_info(instance.primary_node,
11907 instance.hypervisor)
11908 remote_info.Raise("Error checking node %s" % instance.primary_node)
11909 remote_info = remote_info.payload
11910 if remote_info and "state" in remote_info:
11911 remote_state = "up"
11913 if instance.admin_state == constants.ADMINST_UP:
11914 remote_state = "down"
11916 remote_state = instance.admin_state
11918 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11921 snodes_group_uuids = [nodes[snode_name].group
11922 for snode_name in instance.secondary_nodes]
11924 result[instance.name] = {
11925 "name": instance.name,
11926 "config_state": instance.admin_state,
11927 "run_state": remote_state,
11928 "pnode": instance.primary_node,
11929 "pnode_group_uuid": pnode.group,
11930 "pnode_group_name": group2name_fn(pnode.group),
11931 "snodes": instance.secondary_nodes,
11932 "snodes_group_uuids": snodes_group_uuids,
11933 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11935 # this happens to be the same format used for hooks
11936 "nics": _NICListToTuple(self, instance.nics),
11937 "disk_template": instance.disk_template,
11939 "hypervisor": instance.hypervisor,
11940 "network_port": instance.network_port,
11941 "hv_instance": instance.hvparams,
11942 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11943 "be_instance": instance.beparams,
11944 "be_actual": cluster.FillBE(instance),
11945 "os_instance": instance.osparams,
11946 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11947 "serial_no": instance.serial_no,
11948 "mtime": instance.mtime,
11949 "ctime": instance.ctime,
11950 "uuid": instance.uuid,
11956 def PrepareContainerMods(mods, private_fn):
11957 """Prepares a list of container modifications by adding a private data field.
11959 @type mods: list of tuples; (operation, index, parameters)
11960 @param mods: List of modifications
11961 @type private_fn: callable or None
11962 @param private_fn: Callable for constructing a private data field for a
11967 if private_fn is None:
11972 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11975 #: Type description for changes as returned by L{ApplyContainerMods}'s
11977 _TApplyContModsCbChanges = \
11978 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11979 ht.TNonEmptyString,
11984 def ApplyContainerMods(kind, container, chgdesc, mods,
11985 create_fn, modify_fn, remove_fn):
11986 """Applies descriptions in C{mods} to C{container}.
11989 @param kind: One-word item description
11990 @type container: list
11991 @param container: Container to modify
11992 @type chgdesc: None or list
11993 @param chgdesc: List of applied changes
11995 @param mods: Modifications as returned by L{PrepareContainerMods}
11996 @type create_fn: callable
11997 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11998 receives absolute item index, parameters and private data object as added
11999 by L{PrepareContainerMods}, returns tuple containing new item and changes
12001 @type modify_fn: callable
12002 @param modify_fn: Callback for modifying an existing item
12003 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12004 and private data object as added by L{PrepareContainerMods}, returns
12006 @type remove_fn: callable
12007 @param remove_fn: Callback on removing item; receives absolute item index,
12008 item and private data object as added by L{PrepareContainerMods}
12011 for (op, idx, params, private) in mods:
12014 absidx = len(container) - 1
12016 raise IndexError("Not accepting negative indices other than -1")
12017 elif idx > len(container):
12018 raise IndexError("Got %s index %s, but there are only %s" %
12019 (kind, idx, len(container)))
12025 if op == constants.DDM_ADD:
12026 # Calculate where item will be added
12028 addidx = len(container)
12032 if create_fn is None:
12035 (item, changes) = create_fn(addidx, params, private)
12038 container.append(item)
12041 assert idx <= len(container)
12042 # list.insert does so before the specified index
12043 container.insert(idx, item)
12045 # Retrieve existing item
12047 item = container[absidx]
12049 raise IndexError("Invalid %s index %s" % (kind, idx))
12051 if op == constants.DDM_REMOVE:
12054 if remove_fn is not None:
12055 remove_fn(absidx, item, private)
12057 changes = [("%s/%s" % (kind, absidx), "remove")]
12059 assert container[absidx] == item
12060 del container[absidx]
12061 elif op == constants.DDM_MODIFY:
12062 if modify_fn is not None:
12063 changes = modify_fn(absidx, item, params, private)
12065 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12067 assert _TApplyContModsCbChanges(changes)
12069 if not (chgdesc is None or changes is None):
12070 chgdesc.extend(changes)
12073 def _UpdateIvNames(base_index, disks):
12074 """Updates the C{iv_name} attribute of disks.
12076 @type disks: list of L{objects.Disk}
12079 for (idx, disk) in enumerate(disks):
12080 disk.iv_name = "disk/%s" % (base_index + idx, )
12083 class _InstNicModPrivate:
12084 """Data structure for network interface modifications.
12086 Used by L{LUInstanceSetParams}.
12089 def __init__(self):
12094 class LUInstanceSetParams(LogicalUnit):
12095 """Modifies an instances's parameters.
12098 HPATH = "instance-modify"
12099 HTYPE = constants.HTYPE_INSTANCE
12103 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12104 assert ht.TList(mods)
12105 assert not mods or len(mods[0]) in (2, 3)
12107 if mods and len(mods[0]) == 2:
12111 for op, params in mods:
12112 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12113 result.append((op, -1, params))
12117 raise errors.OpPrereqError("Only one %s add or remove operation is"
12118 " supported at a time" % kind,
12119 errors.ECODE_INVAL)
12121 result.append((constants.DDM_MODIFY, op, params))
12123 assert verify_fn(result)
12130 def _CheckMods(kind, mods, key_types, item_fn):
12131 """Ensures requested disk/NIC modifications are valid.
12134 for (op, _, params) in mods:
12135 assert ht.TDict(params)
12137 utils.ForceDictType(params, key_types)
12139 if op == constants.DDM_REMOVE:
12141 raise errors.OpPrereqError("No settings should be passed when"
12142 " removing a %s" % kind,
12143 errors.ECODE_INVAL)
12144 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12145 item_fn(op, params)
12147 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12150 def _VerifyDiskModification(op, params):
12151 """Verifies a disk modification.
12154 if op == constants.DDM_ADD:
12155 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12156 if mode not in constants.DISK_ACCESS_SET:
12157 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12158 errors.ECODE_INVAL)
12160 size = params.get(constants.IDISK_SIZE, None)
12162 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12163 constants.IDISK_SIZE, errors.ECODE_INVAL)
12167 except (TypeError, ValueError), err:
12168 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12169 errors.ECODE_INVAL)
12171 params[constants.IDISK_SIZE] = size
12173 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12174 raise errors.OpPrereqError("Disk size change not possible, use"
12175 " grow-disk", errors.ECODE_INVAL)
12178 def _VerifyNicModification(op, params):
12179 """Verifies a network interface modification.
12182 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12183 ip = params.get(constants.INIC_IP, None)
12186 elif ip.lower() == constants.VALUE_NONE:
12187 params[constants.INIC_IP] = None
12188 elif not netutils.IPAddress.IsValid(ip):
12189 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12190 errors.ECODE_INVAL)
12192 bridge = params.get("bridge", None)
12193 link = params.get(constants.INIC_LINK, None)
12194 if bridge and link:
12195 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12196 " at the same time", errors.ECODE_INVAL)
12197 elif bridge and bridge.lower() == constants.VALUE_NONE:
12198 params["bridge"] = None
12199 elif link and link.lower() == constants.VALUE_NONE:
12200 params[constants.INIC_LINK] = None
12202 if op == constants.DDM_ADD:
12203 macaddr = params.get(constants.INIC_MAC, None)
12204 if macaddr is None:
12205 params[constants.INIC_MAC] = constants.VALUE_AUTO
12207 if constants.INIC_MAC in params:
12208 macaddr = params[constants.INIC_MAC]
12209 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12210 macaddr = utils.NormalizeAndValidateMac(macaddr)
12212 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12213 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12214 " modifying an existing NIC",
12215 errors.ECODE_INVAL)
12217 def CheckArguments(self):
12218 if not (self.op.nics or self.op.disks or self.op.disk_template or
12219 self.op.hvparams or self.op.beparams or self.op.os_name or
12220 self.op.offline is not None or self.op.runtime_mem):
12221 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12223 if self.op.hvparams:
12224 _CheckGlobalHvParams(self.op.hvparams)
12227 self._UpgradeDiskNicMods("disk", self.op.disks,
12228 opcodes.OpInstanceSetParams.TestDiskModifications)
12230 self._UpgradeDiskNicMods("NIC", self.op.nics,
12231 opcodes.OpInstanceSetParams.TestNicModifications)
12233 # Check disk modifications
12234 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12235 self._VerifyDiskModification)
12237 if self.op.disks and self.op.disk_template is not None:
12238 raise errors.OpPrereqError("Disk template conversion and other disk"
12239 " changes not supported at the same time",
12240 errors.ECODE_INVAL)
12242 if (self.op.disk_template and
12243 self.op.disk_template in constants.DTS_INT_MIRROR and
12244 self.op.remote_node is None):
12245 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12246 " one requires specifying a secondary node",
12247 errors.ECODE_INVAL)
12249 # Check NIC modifications
12250 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12251 self._VerifyNicModification)
12253 def ExpandNames(self):
12254 self._ExpandAndLockInstance()
12255 # Can't even acquire node locks in shared mode as upcoming changes in
12256 # Ganeti 2.6 will start to modify the node object on disk conversion
12257 self.needed_locks[locking.LEVEL_NODE] = []
12258 self.needed_locks[locking.LEVEL_NODE_RES] = []
12259 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12261 def DeclareLocks(self, level):
12262 # TODO: Acquire group lock in shared mode (disk parameters)
12263 if level == locking.LEVEL_NODE:
12264 self._LockInstancesNodes()
12265 if self.op.disk_template and self.op.remote_node:
12266 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12267 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12268 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12270 self.needed_locks[locking.LEVEL_NODE_RES] = \
12271 self.needed_locks[locking.LEVEL_NODE][:]
12273 def BuildHooksEnv(self):
12274 """Build hooks env.
12276 This runs on the master, primary and secondaries.
12280 if constants.BE_MINMEM in self.be_new:
12281 args["minmem"] = self.be_new[constants.BE_MINMEM]
12282 if constants.BE_MAXMEM in self.be_new:
12283 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12284 if constants.BE_VCPUS in self.be_new:
12285 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12286 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12287 # information at all.
12289 if self._new_nics is not None:
12292 for nic in self._new_nics:
12293 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12294 mode = nicparams[constants.NIC_MODE]
12295 link = nicparams[constants.NIC_LINK]
12296 nics.append((nic.ip, nic.mac, mode, link))
12298 args["nics"] = nics
12300 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12301 if self.op.disk_template:
12302 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12303 if self.op.runtime_mem:
12304 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12308 def BuildHooksNodes(self):
12309 """Build hooks nodes.
12312 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12315 def _PrepareNicModification(self, params, private, old_ip, old_params,
12317 update_params_dict = dict([(key, params[key])
12318 for key in constants.NICS_PARAMETERS
12321 if "bridge" in params:
12322 update_params_dict[constants.NIC_LINK] = params["bridge"]
12324 new_params = _GetUpdatedParams(old_params, update_params_dict)
12325 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12327 new_filled_params = cluster.SimpleFillNIC(new_params)
12328 objects.NIC.CheckParameterSyntax(new_filled_params)
12330 new_mode = new_filled_params[constants.NIC_MODE]
12331 if new_mode == constants.NIC_MODE_BRIDGED:
12332 bridge = new_filled_params[constants.NIC_LINK]
12333 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12335 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12337 self.warn.append(msg)
12339 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12341 elif new_mode == constants.NIC_MODE_ROUTED:
12342 ip = params.get(constants.INIC_IP, old_ip)
12344 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12345 " on a routed NIC", errors.ECODE_INVAL)
12347 if constants.INIC_MAC in params:
12348 mac = params[constants.INIC_MAC]
12350 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12351 errors.ECODE_INVAL)
12352 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12353 # otherwise generate the MAC address
12354 params[constants.INIC_MAC] = \
12355 self.cfg.GenerateMAC(self.proc.GetECId())
12357 # or validate/reserve the current one
12359 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12360 except errors.ReservationError:
12361 raise errors.OpPrereqError("MAC address '%s' already in use"
12362 " in cluster" % mac,
12363 errors.ECODE_NOTUNIQUE)
12365 private.params = new_params
12366 private.filled = new_filled_params
12368 def CheckPrereq(self):
12369 """Check prerequisites.
12371 This only checks the instance list against the existing names.
12374 # checking the new params on the primary/secondary nodes
12376 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12377 cluster = self.cluster = self.cfg.GetClusterInfo()
12378 assert self.instance is not None, \
12379 "Cannot retrieve locked instance %s" % self.op.instance_name
12380 pnode = instance.primary_node
12381 nodelist = list(instance.all_nodes)
12382 pnode_info = self.cfg.GetNodeInfo(pnode)
12383 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12385 # Prepare disk/NIC modifications
12386 self.diskmod = PrepareContainerMods(self.op.disks, None)
12387 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12390 if self.op.os_name and not self.op.force:
12391 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12392 self.op.force_variant)
12393 instance_os = self.op.os_name
12395 instance_os = instance.os
12397 assert not (self.op.disk_template and self.op.disks), \
12398 "Can't modify disk template and apply disk changes at the same time"
12400 if self.op.disk_template:
12401 if instance.disk_template == self.op.disk_template:
12402 raise errors.OpPrereqError("Instance already has disk template %s" %
12403 instance.disk_template, errors.ECODE_INVAL)
12405 if (instance.disk_template,
12406 self.op.disk_template) not in self._DISK_CONVERSIONS:
12407 raise errors.OpPrereqError("Unsupported disk template conversion from"
12408 " %s to %s" % (instance.disk_template,
12409 self.op.disk_template),
12410 errors.ECODE_INVAL)
12411 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12412 msg="cannot change disk template")
12413 if self.op.disk_template in constants.DTS_INT_MIRROR:
12414 if self.op.remote_node == pnode:
12415 raise errors.OpPrereqError("Given new secondary node %s is the same"
12416 " as the primary node of the instance" %
12417 self.op.remote_node, errors.ECODE_STATE)
12418 _CheckNodeOnline(self, self.op.remote_node)
12419 _CheckNodeNotDrained(self, self.op.remote_node)
12420 # FIXME: here we assume that the old instance type is DT_PLAIN
12421 assert instance.disk_template == constants.DT_PLAIN
12422 disks = [{constants.IDISK_SIZE: d.size,
12423 constants.IDISK_VG: d.logical_id[0]}
12424 for d in instance.disks]
12425 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12426 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12428 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12429 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12430 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12431 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12432 ignore=self.op.ignore_ipolicy)
12433 if pnode_info.group != snode_info.group:
12434 self.LogWarning("The primary and secondary nodes are in two"
12435 " different node groups; the disk parameters"
12436 " from the first disk's node group will be"
12439 # hvparams processing
12440 if self.op.hvparams:
12441 hv_type = instance.hypervisor
12442 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12443 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12444 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12447 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12448 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12449 self.hv_proposed = self.hv_new = hv_new # the new actual values
12450 self.hv_inst = i_hvdict # the new dict (without defaults)
12452 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12454 self.hv_new = self.hv_inst = {}
12456 # beparams processing
12457 if self.op.beparams:
12458 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12460 objects.UpgradeBeParams(i_bedict)
12461 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12462 be_new = cluster.SimpleFillBE(i_bedict)
12463 self.be_proposed = self.be_new = be_new # the new actual values
12464 self.be_inst = i_bedict # the new dict (without defaults)
12466 self.be_new = self.be_inst = {}
12467 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12468 be_old = cluster.FillBE(instance)
12470 # CPU param validation -- checking every time a parameter is
12471 # changed to cover all cases where either CPU mask or vcpus have
12473 if (constants.BE_VCPUS in self.be_proposed and
12474 constants.HV_CPU_MASK in self.hv_proposed):
12476 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12477 # Verify mask is consistent with number of vCPUs. Can skip this
12478 # test if only 1 entry in the CPU mask, which means same mask
12479 # is applied to all vCPUs.
12480 if (len(cpu_list) > 1 and
12481 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12482 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12484 (self.be_proposed[constants.BE_VCPUS],
12485 self.hv_proposed[constants.HV_CPU_MASK]),
12486 errors.ECODE_INVAL)
12488 # Only perform this test if a new CPU mask is given
12489 if constants.HV_CPU_MASK in self.hv_new:
12490 # Calculate the largest CPU number requested
12491 max_requested_cpu = max(map(max, cpu_list))
12492 # Check that all of the instance's nodes have enough physical CPUs to
12493 # satisfy the requested CPU mask
12494 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12495 max_requested_cpu + 1, instance.hypervisor)
12497 # osparams processing
12498 if self.op.osparams:
12499 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12500 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12501 self.os_inst = i_osdict # the new dict (without defaults)
12507 #TODO(dynmem): do the appropriate check involving MINMEM
12508 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12509 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12510 mem_check_list = [pnode]
12511 if be_new[constants.BE_AUTO_BALANCE]:
12512 # either we changed auto_balance to yes or it was from before
12513 mem_check_list.extend(instance.secondary_nodes)
12514 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12515 instance.hypervisor)
12516 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12517 [instance.hypervisor])
12518 pninfo = nodeinfo[pnode]
12519 msg = pninfo.fail_msg
12521 # Assume the primary node is unreachable and go ahead
12522 self.warn.append("Can't get info from primary node %s: %s" %
12525 (_, _, (pnhvinfo, )) = pninfo.payload
12526 if not isinstance(pnhvinfo.get("memory_free", None), int):
12527 self.warn.append("Node data from primary node %s doesn't contain"
12528 " free memory information" % pnode)
12529 elif instance_info.fail_msg:
12530 self.warn.append("Can't get instance runtime information: %s" %
12531 instance_info.fail_msg)
12533 if instance_info.payload:
12534 current_mem = int(instance_info.payload["memory"])
12536 # Assume instance not running
12537 # (there is a slight race condition here, but it's not very
12538 # probable, and we have no other way to check)
12539 # TODO: Describe race condition
12541 #TODO(dynmem): do the appropriate check involving MINMEM
12542 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12543 pnhvinfo["memory_free"])
12545 raise errors.OpPrereqError("This change will prevent the instance"
12546 " from starting, due to %d MB of memory"
12547 " missing on its primary node" %
12549 errors.ECODE_NORES)
12551 if be_new[constants.BE_AUTO_BALANCE]:
12552 for node, nres in nodeinfo.items():
12553 if node not in instance.secondary_nodes:
12555 nres.Raise("Can't get info from secondary node %s" % node,
12556 prereq=True, ecode=errors.ECODE_STATE)
12557 (_, _, (nhvinfo, )) = nres.payload
12558 if not isinstance(nhvinfo.get("memory_free", None), int):
12559 raise errors.OpPrereqError("Secondary node %s didn't return free"
12560 " memory information" % node,
12561 errors.ECODE_STATE)
12562 #TODO(dynmem): do the appropriate check involving MINMEM
12563 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12564 raise errors.OpPrereqError("This change will prevent the instance"
12565 " from failover to its secondary node"
12566 " %s, due to not enough memory" % node,
12567 errors.ECODE_STATE)
12569 if self.op.runtime_mem:
12570 remote_info = self.rpc.call_instance_info(instance.primary_node,
12572 instance.hypervisor)
12573 remote_info.Raise("Error checking node %s" % instance.primary_node)
12574 if not remote_info.payload: # not running already
12575 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12576 errors.ECODE_STATE)
12578 current_memory = remote_info.payload["memory"]
12579 if (not self.op.force and
12580 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12581 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12582 raise errors.OpPrereqError("Instance %s must have memory between %d"
12583 " and %d MB of memory unless --force is"
12584 " given" % (instance.name,
12585 self.be_proposed[constants.BE_MINMEM],
12586 self.be_proposed[constants.BE_MAXMEM]),
12587 errors.ECODE_INVAL)
12589 if self.op.runtime_mem > current_memory:
12590 _CheckNodeFreeMemory(self, instance.primary_node,
12591 "ballooning memory for instance %s" %
12593 self.op.memory - current_memory,
12594 instance.hypervisor)
12596 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12597 raise errors.OpPrereqError("Disk operations not supported for"
12598 " diskless instances",
12599 errors.ECODE_INVAL)
12601 def _PrepareNicCreate(_, params, private):
12602 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12603 return (None, None)
12605 def _PrepareNicMod(_, nic, params, private):
12606 self._PrepareNicModification(params, private, nic.ip,
12607 nic.nicparams, cluster, pnode)
12610 # Verify NIC changes (operating on copy)
12611 nics = instance.nics[:]
12612 ApplyContainerMods("NIC", nics, None, self.nicmod,
12613 _PrepareNicCreate, _PrepareNicMod, None)
12614 if len(nics) > constants.MAX_NICS:
12615 raise errors.OpPrereqError("Instance has too many network interfaces"
12616 " (%d), cannot add more" % constants.MAX_NICS,
12617 errors.ECODE_STATE)
12619 # Verify disk changes (operating on a copy)
12620 disks = instance.disks[:]
12621 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12622 if len(disks) > constants.MAX_DISKS:
12623 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12624 " more" % constants.MAX_DISKS,
12625 errors.ECODE_STATE)
12627 if self.op.offline is not None:
12628 if self.op.offline:
12629 msg = "can't change to offline"
12631 msg = "can't change to online"
12632 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12634 # Pre-compute NIC changes (necessary to use result in hooks)
12635 self._nic_chgdesc = []
12637 # Operate on copies as this is still in prereq
12638 nics = [nic.Copy() for nic in instance.nics]
12639 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12640 self._CreateNewNic, self._ApplyNicMods, None)
12641 self._new_nics = nics
12643 self._new_nics = None
12645 def _ConvertPlainToDrbd(self, feedback_fn):
12646 """Converts an instance from plain to drbd.
12649 feedback_fn("Converting template to drbd")
12650 instance = self.instance
12651 pnode = instance.primary_node
12652 snode = self.op.remote_node
12654 assert instance.disk_template == constants.DT_PLAIN
12656 # create a fake disk info for _GenerateDiskTemplate
12657 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12658 constants.IDISK_VG: d.logical_id[0]}
12659 for d in instance.disks]
12660 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12661 instance.name, pnode, [snode],
12662 disk_info, None, None, 0, feedback_fn,
12664 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12666 info = _GetInstanceInfoText(instance)
12667 feedback_fn("Creating additional volumes...")
12668 # first, create the missing data and meta devices
12669 for disk in anno_disks:
12670 # unfortunately this is... not too nice
12671 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12673 for child in disk.children:
12674 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12675 # at this stage, all new LVs have been created, we can rename the
12677 feedback_fn("Renaming original volumes...")
12678 rename_list = [(o, n.children[0].logical_id)
12679 for (o, n) in zip(instance.disks, new_disks)]
12680 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12681 result.Raise("Failed to rename original LVs")
12683 feedback_fn("Initializing DRBD devices...")
12684 # all child devices are in place, we can now create the DRBD devices
12685 for disk in anno_disks:
12686 for node in [pnode, snode]:
12687 f_create = node == pnode
12688 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12690 # at this point, the instance has been modified
12691 instance.disk_template = constants.DT_DRBD8
12692 instance.disks = new_disks
12693 self.cfg.Update(instance, feedback_fn)
12695 # Release node locks while waiting for sync
12696 _ReleaseLocks(self, locking.LEVEL_NODE)
12698 # disks are created, waiting for sync
12699 disk_abort = not _WaitForSync(self, instance,
12700 oneshot=not self.op.wait_for_sync)
12702 raise errors.OpExecError("There are some degraded disks for"
12703 " this instance, please cleanup manually")
12705 # Node resource locks will be released by caller
12707 def _ConvertDrbdToPlain(self, feedback_fn):
12708 """Converts an instance from drbd to plain.
12711 instance = self.instance
12713 assert len(instance.secondary_nodes) == 1
12714 assert instance.disk_template == constants.DT_DRBD8
12716 pnode = instance.primary_node
12717 snode = instance.secondary_nodes[0]
12718 feedback_fn("Converting template to plain")
12720 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12721 new_disks = [d.children[0] for d in instance.disks]
12723 # copy over size and mode
12724 for parent, child in zip(old_disks, new_disks):
12725 child.size = parent.size
12726 child.mode = parent.mode
12728 # this is a DRBD disk, return its port to the pool
12729 # NOTE: this must be done right before the call to cfg.Update!
12730 for disk in old_disks:
12731 tcp_port = disk.logical_id[2]
12732 self.cfg.AddTcpUdpPort(tcp_port)
12734 # update instance structure
12735 instance.disks = new_disks
12736 instance.disk_template = constants.DT_PLAIN
12737 self.cfg.Update(instance, feedback_fn)
12739 # Release locks in case removing disks takes a while
12740 _ReleaseLocks(self, locking.LEVEL_NODE)
12742 feedback_fn("Removing volumes on the secondary node...")
12743 for disk in old_disks:
12744 self.cfg.SetDiskID(disk, snode)
12745 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12747 self.LogWarning("Could not remove block device %s on node %s,"
12748 " continuing anyway: %s", disk.iv_name, snode, msg)
12750 feedback_fn("Removing unneeded volumes on the primary node...")
12751 for idx, disk in enumerate(old_disks):
12752 meta = disk.children[1]
12753 self.cfg.SetDiskID(meta, pnode)
12754 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12756 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12757 " continuing anyway: %s", idx, pnode, msg)
12759 def _CreateNewDisk(self, idx, params, _):
12760 """Creates a new disk.
12763 instance = self.instance
12766 if instance.disk_template in constants.DTS_FILEBASED:
12767 (file_driver, file_path) = instance.disks[0].logical_id
12768 file_path = os.path.dirname(file_path)
12770 file_driver = file_path = None
12773 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12774 instance.primary_node, instance.secondary_nodes,
12775 [params], file_path, file_driver, idx,
12776 self.Log, self.diskparams)[0]
12778 info = _GetInstanceInfoText(instance)
12780 logging.info("Creating volume %s for instance %s",
12781 disk.iv_name, instance.name)
12782 # Note: this needs to be kept in sync with _CreateDisks
12784 for node in instance.all_nodes:
12785 f_create = (node == instance.primary_node)
12787 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12788 except errors.OpExecError, err:
12789 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12790 disk.iv_name, disk, node, err)
12793 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12797 def _ModifyDisk(idx, disk, params, _):
12798 """Modifies a disk.
12801 disk.mode = params[constants.IDISK_MODE]
12804 ("disk.mode/%d" % idx, disk.mode),
12807 def _RemoveDisk(self, idx, root, _):
12811 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12812 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12813 self.cfg.SetDiskID(disk, node)
12814 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12816 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12817 " continuing anyway", idx, node, msg)
12819 # if this is a DRBD disk, return its port to the pool
12820 if root.dev_type in constants.LDS_DRBD:
12821 self.cfg.AddTcpUdpPort(root.logical_id[2])
12824 def _CreateNewNic(idx, params, private):
12825 """Creates data structure for a new network interface.
12828 mac = params[constants.INIC_MAC]
12829 ip = params.get(constants.INIC_IP, None)
12830 nicparams = private.params
12832 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12834 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12835 (mac, ip, private.filled[constants.NIC_MODE],
12836 private.filled[constants.NIC_LINK])),
12840 def _ApplyNicMods(idx, nic, params, private):
12841 """Modifies a network interface.
12846 for key in [constants.INIC_MAC, constants.INIC_IP]:
12848 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12849 setattr(nic, key, params[key])
12852 nic.nicparams = private.params
12854 for (key, val) in params.items():
12855 changes.append(("nic.%s/%d" % (key, idx), val))
12859 def Exec(self, feedback_fn):
12860 """Modifies an instance.
12862 All parameters take effect only at the next restart of the instance.
12865 # Process here the warnings from CheckPrereq, as we don't have a
12866 # feedback_fn there.
12867 # TODO: Replace with self.LogWarning
12868 for warn in self.warn:
12869 feedback_fn("WARNING: %s" % warn)
12871 assert ((self.op.disk_template is None) ^
12872 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12873 "Not owning any node resource locks"
12876 instance = self.instance
12879 if self.op.runtime_mem:
12880 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12882 self.op.runtime_mem)
12883 rpcres.Raise("Cannot modify instance runtime memory")
12884 result.append(("runtime_memory", self.op.runtime_mem))
12886 # Apply disk changes
12887 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12888 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12889 _UpdateIvNames(0, instance.disks)
12891 if self.op.disk_template:
12893 check_nodes = set(instance.all_nodes)
12894 if self.op.remote_node:
12895 check_nodes.add(self.op.remote_node)
12896 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12897 owned = self.owned_locks(level)
12898 assert not (check_nodes - owned), \
12899 ("Not owning the correct locks, owning %r, expected at least %r" %
12900 (owned, check_nodes))
12902 r_shut = _ShutdownInstanceDisks(self, instance)
12904 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12905 " proceed with disk template conversion")
12906 mode = (instance.disk_template, self.op.disk_template)
12908 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12910 self.cfg.ReleaseDRBDMinors(instance.name)
12912 result.append(("disk_template", self.op.disk_template))
12914 assert instance.disk_template == self.op.disk_template, \
12915 ("Expected disk template '%s', found '%s'" %
12916 (self.op.disk_template, instance.disk_template))
12918 # Release node and resource locks if there are any (they might already have
12919 # been released during disk conversion)
12920 _ReleaseLocks(self, locking.LEVEL_NODE)
12921 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12923 # Apply NIC changes
12924 if self._new_nics is not None:
12925 instance.nics = self._new_nics
12926 result.extend(self._nic_chgdesc)
12929 if self.op.hvparams:
12930 instance.hvparams = self.hv_inst
12931 for key, val in self.op.hvparams.iteritems():
12932 result.append(("hv/%s" % key, val))
12935 if self.op.beparams:
12936 instance.beparams = self.be_inst
12937 for key, val in self.op.beparams.iteritems():
12938 result.append(("be/%s" % key, val))
12941 if self.op.os_name:
12942 instance.os = self.op.os_name
12945 if self.op.osparams:
12946 instance.osparams = self.os_inst
12947 for key, val in self.op.osparams.iteritems():
12948 result.append(("os/%s" % key, val))
12950 if self.op.offline is None:
12953 elif self.op.offline:
12954 # Mark instance as offline
12955 self.cfg.MarkInstanceOffline(instance.name)
12956 result.append(("admin_state", constants.ADMINST_OFFLINE))
12958 # Mark instance as online, but stopped
12959 self.cfg.MarkInstanceDown(instance.name)
12960 result.append(("admin_state", constants.ADMINST_DOWN))
12962 self.cfg.Update(instance, feedback_fn)
12964 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12965 self.owned_locks(locking.LEVEL_NODE)), \
12966 "All node locks should have been released by now"
12970 _DISK_CONVERSIONS = {
12971 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12972 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12976 class LUInstanceChangeGroup(LogicalUnit):
12977 HPATH = "instance-change-group"
12978 HTYPE = constants.HTYPE_INSTANCE
12981 def ExpandNames(self):
12982 self.share_locks = _ShareAll()
12983 self.needed_locks = {
12984 locking.LEVEL_NODEGROUP: [],
12985 locking.LEVEL_NODE: [],
12988 self._ExpandAndLockInstance()
12990 if self.op.target_groups:
12991 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12992 self.op.target_groups)
12994 self.req_target_uuids = None
12996 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12998 def DeclareLocks(self, level):
12999 if level == locking.LEVEL_NODEGROUP:
13000 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13002 if self.req_target_uuids:
13003 lock_groups = set(self.req_target_uuids)
13005 # Lock all groups used by instance optimistically; this requires going
13006 # via the node before it's locked, requiring verification later on
13007 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13008 lock_groups.update(instance_groups)
13010 # No target groups, need to lock all of them
13011 lock_groups = locking.ALL_SET
13013 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13015 elif level == locking.LEVEL_NODE:
13016 if self.req_target_uuids:
13017 # Lock all nodes used by instances
13018 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13019 self._LockInstancesNodes()
13021 # Lock all nodes in all potential target groups
13022 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13023 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13024 member_nodes = [node_name
13025 for group in lock_groups
13026 for node_name in self.cfg.GetNodeGroup(group).members]
13027 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13029 # Lock all nodes as all groups are potential targets
13030 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13032 def CheckPrereq(self):
13033 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13034 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13035 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13037 assert (self.req_target_uuids is None or
13038 owned_groups.issuperset(self.req_target_uuids))
13039 assert owned_instances == set([self.op.instance_name])
13041 # Get instance information
13042 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13044 # Check if node groups for locked instance are still correct
13045 assert owned_nodes.issuperset(self.instance.all_nodes), \
13046 ("Instance %s's nodes changed while we kept the lock" %
13047 self.op.instance_name)
13049 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13052 if self.req_target_uuids:
13053 # User requested specific target groups
13054 self.target_uuids = frozenset(self.req_target_uuids)
13056 # All groups except those used by the instance are potential targets
13057 self.target_uuids = owned_groups - inst_groups
13059 conflicting_groups = self.target_uuids & inst_groups
13060 if conflicting_groups:
13061 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13062 " used by the instance '%s'" %
13063 (utils.CommaJoin(conflicting_groups),
13064 self.op.instance_name),
13065 errors.ECODE_INVAL)
13067 if not self.target_uuids:
13068 raise errors.OpPrereqError("There are no possible target groups",
13069 errors.ECODE_INVAL)
13071 def BuildHooksEnv(self):
13072 """Build hooks env.
13075 assert self.target_uuids
13078 "TARGET_GROUPS": " ".join(self.target_uuids),
13081 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13085 def BuildHooksNodes(self):
13086 """Build hooks nodes.
13089 mn = self.cfg.GetMasterNode()
13090 return ([mn], [mn])
13092 def Exec(self, feedback_fn):
13093 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13095 assert instances == [self.op.instance_name], "Instance not locked"
13097 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13098 instances=instances, target_groups=list(self.target_uuids))
13100 ial.Run(self.op.iallocator)
13102 if not ial.success:
13103 raise errors.OpPrereqError("Can't compute solution for changing group of"
13104 " instance '%s' using iallocator '%s': %s" %
13105 (self.op.instance_name, self.op.iallocator,
13107 errors.ECODE_NORES)
13109 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13111 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13112 " instance '%s'", len(jobs), self.op.instance_name)
13114 return ResultWithJobs(jobs)
13117 class LUBackupQuery(NoHooksLU):
13118 """Query the exports list
13123 def CheckArguments(self):
13124 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13125 ["node", "export"], self.op.use_locking)
13127 def ExpandNames(self):
13128 self.expq.ExpandNames(self)
13130 def DeclareLocks(self, level):
13131 self.expq.DeclareLocks(self, level)
13133 def Exec(self, feedback_fn):
13136 for (node, expname) in self.expq.OldStyleQuery(self):
13137 if expname is None:
13138 result[node] = False
13140 result.setdefault(node, []).append(expname)
13145 class _ExportQuery(_QueryBase):
13146 FIELDS = query.EXPORT_FIELDS
13148 #: The node name is not a unique key for this query
13149 SORT_FIELD = "node"
13151 def ExpandNames(self, lu):
13152 lu.needed_locks = {}
13154 # The following variables interact with _QueryBase._GetNames
13156 self.wanted = _GetWantedNodes(lu, self.names)
13158 self.wanted = locking.ALL_SET
13160 self.do_locking = self.use_locking
13162 if self.do_locking:
13163 lu.share_locks = _ShareAll()
13164 lu.needed_locks = {
13165 locking.LEVEL_NODE: self.wanted,
13168 def DeclareLocks(self, lu, level):
13171 def _GetQueryData(self, lu):
13172 """Computes the list of nodes and their attributes.
13175 # Locking is not used
13177 assert not (compat.any(lu.glm.is_owned(level)
13178 for level in locking.LEVELS
13179 if level != locking.LEVEL_CLUSTER) or
13180 self.do_locking or self.use_locking)
13182 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13186 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13188 result.append((node, None))
13190 result.extend((node, expname) for expname in nres.payload)
13195 class LUBackupPrepare(NoHooksLU):
13196 """Prepares an instance for an export and returns useful information.
13201 def ExpandNames(self):
13202 self._ExpandAndLockInstance()
13204 def CheckPrereq(self):
13205 """Check prerequisites.
13208 instance_name = self.op.instance_name
13210 self.instance = self.cfg.GetInstanceInfo(instance_name)
13211 assert self.instance is not None, \
13212 "Cannot retrieve locked instance %s" % self.op.instance_name
13213 _CheckNodeOnline(self, self.instance.primary_node)
13215 self._cds = _GetClusterDomainSecret()
13217 def Exec(self, feedback_fn):
13218 """Prepares an instance for an export.
13221 instance = self.instance
13223 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13224 salt = utils.GenerateSecret(8)
13226 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13227 result = self.rpc.call_x509_cert_create(instance.primary_node,
13228 constants.RIE_CERT_VALIDITY)
13229 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13231 (name, cert_pem) = result.payload
13233 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13237 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13238 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13240 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13246 class LUBackupExport(LogicalUnit):
13247 """Export an instance to an image in the cluster.
13250 HPATH = "instance-export"
13251 HTYPE = constants.HTYPE_INSTANCE
13254 def CheckArguments(self):
13255 """Check the arguments.
13258 self.x509_key_name = self.op.x509_key_name
13259 self.dest_x509_ca_pem = self.op.destination_x509_ca
13261 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13262 if not self.x509_key_name:
13263 raise errors.OpPrereqError("Missing X509 key name for encryption",
13264 errors.ECODE_INVAL)
13266 if not self.dest_x509_ca_pem:
13267 raise errors.OpPrereqError("Missing destination X509 CA",
13268 errors.ECODE_INVAL)
13270 def ExpandNames(self):
13271 self._ExpandAndLockInstance()
13273 # Lock all nodes for local exports
13274 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13275 # FIXME: lock only instance primary and destination node
13277 # Sad but true, for now we have do lock all nodes, as we don't know where
13278 # the previous export might be, and in this LU we search for it and
13279 # remove it from its current node. In the future we could fix this by:
13280 # - making a tasklet to search (share-lock all), then create the
13281 # new one, then one to remove, after
13282 # - removing the removal operation altogether
13283 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13285 def DeclareLocks(self, level):
13286 """Last minute lock declaration."""
13287 # All nodes are locked anyway, so nothing to do here.
13289 def BuildHooksEnv(self):
13290 """Build hooks env.
13292 This will run on the master, primary node and target node.
13296 "EXPORT_MODE": self.op.mode,
13297 "EXPORT_NODE": self.op.target_node,
13298 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13299 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13300 # TODO: Generic function for boolean env variables
13301 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13304 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13308 def BuildHooksNodes(self):
13309 """Build hooks nodes.
13312 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13314 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13315 nl.append(self.op.target_node)
13319 def CheckPrereq(self):
13320 """Check prerequisites.
13322 This checks that the instance and node names are valid.
13325 instance_name = self.op.instance_name
13327 self.instance = self.cfg.GetInstanceInfo(instance_name)
13328 assert self.instance is not None, \
13329 "Cannot retrieve locked instance %s" % self.op.instance_name
13330 _CheckNodeOnline(self, self.instance.primary_node)
13332 if (self.op.remove_instance and
13333 self.instance.admin_state == constants.ADMINST_UP and
13334 not self.op.shutdown):
13335 raise errors.OpPrereqError("Can not remove instance without shutting it"
13338 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13339 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13340 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13341 assert self.dst_node is not None
13343 _CheckNodeOnline(self, self.dst_node.name)
13344 _CheckNodeNotDrained(self, self.dst_node.name)
13347 self.dest_disk_info = None
13348 self.dest_x509_ca = None
13350 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13351 self.dst_node = None
13353 if len(self.op.target_node) != len(self.instance.disks):
13354 raise errors.OpPrereqError(("Received destination information for %s"
13355 " disks, but instance %s has %s disks") %
13356 (len(self.op.target_node), instance_name,
13357 len(self.instance.disks)),
13358 errors.ECODE_INVAL)
13360 cds = _GetClusterDomainSecret()
13362 # Check X509 key name
13364 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13365 except (TypeError, ValueError), err:
13366 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13368 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13369 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13370 errors.ECODE_INVAL)
13372 # Load and verify CA
13374 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13375 except OpenSSL.crypto.Error, err:
13376 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13377 (err, ), errors.ECODE_INVAL)
13379 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13380 if errcode is not None:
13381 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13382 (msg, ), errors.ECODE_INVAL)
13384 self.dest_x509_ca = cert
13386 # Verify target information
13388 for idx, disk_data in enumerate(self.op.target_node):
13390 (host, port, magic) = \
13391 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13392 except errors.GenericError, err:
13393 raise errors.OpPrereqError("Target info for disk %s: %s" %
13394 (idx, err), errors.ECODE_INVAL)
13396 disk_info.append((host, port, magic))
13398 assert len(disk_info) == len(self.op.target_node)
13399 self.dest_disk_info = disk_info
13402 raise errors.ProgrammerError("Unhandled export mode %r" %
13405 # instance disk type verification
13406 # TODO: Implement export support for file-based disks
13407 for disk in self.instance.disks:
13408 if disk.dev_type == constants.LD_FILE:
13409 raise errors.OpPrereqError("Export not supported for instances with"
13410 " file-based disks", errors.ECODE_INVAL)
13412 def _CleanupExports(self, feedback_fn):
13413 """Removes exports of current instance from all other nodes.
13415 If an instance in a cluster with nodes A..D was exported to node C, its
13416 exports will be removed from the nodes A, B and D.
13419 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13421 nodelist = self.cfg.GetNodeList()
13422 nodelist.remove(self.dst_node.name)
13424 # on one-node clusters nodelist will be empty after the removal
13425 # if we proceed the backup would be removed because OpBackupQuery
13426 # substitutes an empty list with the full cluster node list.
13427 iname = self.instance.name
13429 feedback_fn("Removing old exports for instance %s" % iname)
13430 exportlist = self.rpc.call_export_list(nodelist)
13431 for node in exportlist:
13432 if exportlist[node].fail_msg:
13434 if iname in exportlist[node].payload:
13435 msg = self.rpc.call_export_remove(node, iname).fail_msg
13437 self.LogWarning("Could not remove older export for instance %s"
13438 " on node %s: %s", iname, node, msg)
13440 def Exec(self, feedback_fn):
13441 """Export an instance to an image in the cluster.
13444 assert self.op.mode in constants.EXPORT_MODES
13446 instance = self.instance
13447 src_node = instance.primary_node
13449 if self.op.shutdown:
13450 # shutdown the instance, but not the disks
13451 feedback_fn("Shutting down instance %s" % instance.name)
13452 result = self.rpc.call_instance_shutdown(src_node, instance,
13453 self.op.shutdown_timeout)
13454 # TODO: Maybe ignore failures if ignore_remove_failures is set
13455 result.Raise("Could not shutdown instance %s on"
13456 " node %s" % (instance.name, src_node))
13458 # set the disks ID correctly since call_instance_start needs the
13459 # correct drbd minor to create the symlinks
13460 for disk in instance.disks:
13461 self.cfg.SetDiskID(disk, src_node)
13463 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13466 # Activate the instance disks if we'exporting a stopped instance
13467 feedback_fn("Activating disks for %s" % instance.name)
13468 _StartInstanceDisks(self, instance, None)
13471 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13474 helper.CreateSnapshots()
13476 if (self.op.shutdown and
13477 instance.admin_state == constants.ADMINST_UP and
13478 not self.op.remove_instance):
13479 assert not activate_disks
13480 feedback_fn("Starting instance %s" % instance.name)
13481 result = self.rpc.call_instance_start(src_node,
13482 (instance, None, None), False)
13483 msg = result.fail_msg
13485 feedback_fn("Failed to start instance: %s" % msg)
13486 _ShutdownInstanceDisks(self, instance)
13487 raise errors.OpExecError("Could not start instance: %s" % msg)
13489 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13490 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13491 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13492 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13493 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13495 (key_name, _, _) = self.x509_key_name
13498 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13501 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13502 key_name, dest_ca_pem,
13507 # Check for backwards compatibility
13508 assert len(dresults) == len(instance.disks)
13509 assert compat.all(isinstance(i, bool) for i in dresults), \
13510 "Not all results are boolean: %r" % dresults
13514 feedback_fn("Deactivating disks for %s" % instance.name)
13515 _ShutdownInstanceDisks(self, instance)
13517 if not (compat.all(dresults) and fin_resu):
13520 failures.append("export finalization")
13521 if not compat.all(dresults):
13522 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13524 failures.append("disk export: disk(s) %s" % fdsk)
13526 raise errors.OpExecError("Export failed, errors in %s" %
13527 utils.CommaJoin(failures))
13529 # At this point, the export was successful, we can cleanup/finish
13531 # Remove instance if requested
13532 if self.op.remove_instance:
13533 feedback_fn("Removing instance %s" % instance.name)
13534 _RemoveInstance(self, feedback_fn, instance,
13535 self.op.ignore_remove_failures)
13537 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13538 self._CleanupExports(feedback_fn)
13540 return fin_resu, dresults
13543 class LUBackupRemove(NoHooksLU):
13544 """Remove exports related to the named instance.
13549 def ExpandNames(self):
13550 self.needed_locks = {}
13551 # We need all nodes to be locked in order for RemoveExport to work, but we
13552 # don't need to lock the instance itself, as nothing will happen to it (and
13553 # we can remove exports also for a removed instance)
13554 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13556 def Exec(self, feedback_fn):
13557 """Remove any export.
13560 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13561 # If the instance was not found we'll try with the name that was passed in.
13562 # This will only work if it was an FQDN, though.
13564 if not instance_name:
13566 instance_name = self.op.instance_name
13568 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13569 exportlist = self.rpc.call_export_list(locked_nodes)
13571 for node in exportlist:
13572 msg = exportlist[node].fail_msg
13574 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13576 if instance_name in exportlist[node].payload:
13578 result = self.rpc.call_export_remove(node, instance_name)
13579 msg = result.fail_msg
13581 logging.error("Could not remove export for instance %s"
13582 " on node %s: %s", instance_name, node, msg)
13584 if fqdn_warn and not found:
13585 feedback_fn("Export not found. If trying to remove an export belonging"
13586 " to a deleted instance please use its Fully Qualified"
13590 class LUGroupAdd(LogicalUnit):
13591 """Logical unit for creating node groups.
13594 HPATH = "group-add"
13595 HTYPE = constants.HTYPE_GROUP
13598 def ExpandNames(self):
13599 # We need the new group's UUID here so that we can create and acquire the
13600 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13601 # that it should not check whether the UUID exists in the configuration.
13602 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13603 self.needed_locks = {}
13604 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13606 def CheckPrereq(self):
13607 """Check prerequisites.
13609 This checks that the given group name is not an existing node group
13614 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13615 except errors.OpPrereqError:
13618 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13619 " node group (UUID: %s)" %
13620 (self.op.group_name, existing_uuid),
13621 errors.ECODE_EXISTS)
13623 if self.op.ndparams:
13624 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13626 if self.op.hv_state:
13627 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13629 self.new_hv_state = None
13631 if self.op.disk_state:
13632 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13634 self.new_disk_state = None
13636 if self.op.diskparams:
13637 for templ in constants.DISK_TEMPLATES:
13638 if templ in self.op.diskparams:
13639 utils.ForceDictType(self.op.diskparams[templ],
13640 constants.DISK_DT_TYPES)
13641 self.new_diskparams = self.op.diskparams
13643 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13644 except errors.OpPrereqError, err:
13645 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13646 errors.ECODE_INVAL)
13648 self.new_diskparams = {}
13650 if self.op.ipolicy:
13651 cluster = self.cfg.GetClusterInfo()
13652 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13654 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13655 except errors.ConfigurationError, err:
13656 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13657 errors.ECODE_INVAL)
13659 def BuildHooksEnv(self):
13660 """Build hooks env.
13664 "GROUP_NAME": self.op.group_name,
13667 def BuildHooksNodes(self):
13668 """Build hooks nodes.
13671 mn = self.cfg.GetMasterNode()
13672 return ([mn], [mn])
13674 def Exec(self, feedback_fn):
13675 """Add the node group to the cluster.
13678 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13679 uuid=self.group_uuid,
13680 alloc_policy=self.op.alloc_policy,
13681 ndparams=self.op.ndparams,
13682 diskparams=self.new_diskparams,
13683 ipolicy=self.op.ipolicy,
13684 hv_state_static=self.new_hv_state,
13685 disk_state_static=self.new_disk_state)
13687 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13688 del self.remove_locks[locking.LEVEL_NODEGROUP]
13691 class LUGroupAssignNodes(NoHooksLU):
13692 """Logical unit for assigning nodes to groups.
13697 def ExpandNames(self):
13698 # These raise errors.OpPrereqError on their own:
13699 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13700 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13702 # We want to lock all the affected nodes and groups. We have readily
13703 # available the list of nodes, and the *destination* group. To gather the
13704 # list of "source" groups, we need to fetch node information later on.
13705 self.needed_locks = {
13706 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13707 locking.LEVEL_NODE: self.op.nodes,
13710 def DeclareLocks(self, level):
13711 if level == locking.LEVEL_NODEGROUP:
13712 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13714 # Try to get all affected nodes' groups without having the group or node
13715 # lock yet. Needs verification later in the code flow.
13716 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13718 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13720 def CheckPrereq(self):
13721 """Check prerequisites.
13724 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13725 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13726 frozenset(self.op.nodes))
13728 expected_locks = (set([self.group_uuid]) |
13729 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13730 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13731 if actual_locks != expected_locks:
13732 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13733 " current groups are '%s', used to be '%s'" %
13734 (utils.CommaJoin(expected_locks),
13735 utils.CommaJoin(actual_locks)))
13737 self.node_data = self.cfg.GetAllNodesInfo()
13738 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13739 instance_data = self.cfg.GetAllInstancesInfo()
13741 if self.group is None:
13742 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13743 (self.op.group_name, self.group_uuid))
13745 (new_splits, previous_splits) = \
13746 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13747 for node in self.op.nodes],
13748 self.node_data, instance_data)
13751 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13753 if not self.op.force:
13754 raise errors.OpExecError("The following instances get split by this"
13755 " change and --force was not given: %s" %
13758 self.LogWarning("This operation will split the following instances: %s",
13761 if previous_splits:
13762 self.LogWarning("In addition, these already-split instances continue"
13763 " to be split across groups: %s",
13764 utils.CommaJoin(utils.NiceSort(previous_splits)))
13766 def Exec(self, feedback_fn):
13767 """Assign nodes to a new group.
13770 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13772 self.cfg.AssignGroupNodes(mods)
13775 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13776 """Check for split instances after a node assignment.
13778 This method considers a series of node assignments as an atomic operation,
13779 and returns information about split instances after applying the set of
13782 In particular, it returns information about newly split instances, and
13783 instances that were already split, and remain so after the change.
13785 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13788 @type changes: list of (node_name, new_group_uuid) pairs.
13789 @param changes: list of node assignments to consider.
13790 @param node_data: a dict with data for all nodes
13791 @param instance_data: a dict with all instances to consider
13792 @rtype: a two-tuple
13793 @return: a list of instances that were previously okay and result split as a
13794 consequence of this change, and a list of instances that were previously
13795 split and this change does not fix.
13798 changed_nodes = dict((node, group) for node, group in changes
13799 if node_data[node].group != group)
13801 all_split_instances = set()
13802 previously_split_instances = set()
13804 def InstanceNodes(instance):
13805 return [instance.primary_node] + list(instance.secondary_nodes)
13807 for inst in instance_data.values():
13808 if inst.disk_template not in constants.DTS_INT_MIRROR:
13811 instance_nodes = InstanceNodes(inst)
13813 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13814 previously_split_instances.add(inst.name)
13816 if len(set(changed_nodes.get(node, node_data[node].group)
13817 for node in instance_nodes)) > 1:
13818 all_split_instances.add(inst.name)
13820 return (list(all_split_instances - previously_split_instances),
13821 list(previously_split_instances & all_split_instances))
13824 class _GroupQuery(_QueryBase):
13825 FIELDS = query.GROUP_FIELDS
13827 def ExpandNames(self, lu):
13828 lu.needed_locks = {}
13830 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13831 self._cluster = lu.cfg.GetClusterInfo()
13832 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13835 self.wanted = [name_to_uuid[name]
13836 for name in utils.NiceSort(name_to_uuid.keys())]
13838 # Accept names to be either names or UUIDs.
13841 all_uuid = frozenset(self._all_groups.keys())
13843 for name in self.names:
13844 if name in all_uuid:
13845 self.wanted.append(name)
13846 elif name in name_to_uuid:
13847 self.wanted.append(name_to_uuid[name])
13849 missing.append(name)
13852 raise errors.OpPrereqError("Some groups do not exist: %s" %
13853 utils.CommaJoin(missing),
13854 errors.ECODE_NOENT)
13856 def DeclareLocks(self, lu, level):
13859 def _GetQueryData(self, lu):
13860 """Computes the list of node groups and their attributes.
13863 do_nodes = query.GQ_NODE in self.requested_data
13864 do_instances = query.GQ_INST in self.requested_data
13866 group_to_nodes = None
13867 group_to_instances = None
13869 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13870 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13871 # latter GetAllInstancesInfo() is not enough, for we have to go through
13872 # instance->node. Hence, we will need to process nodes even if we only need
13873 # instance information.
13874 if do_nodes or do_instances:
13875 all_nodes = lu.cfg.GetAllNodesInfo()
13876 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13879 for node in all_nodes.values():
13880 if node.group in group_to_nodes:
13881 group_to_nodes[node.group].append(node.name)
13882 node_to_group[node.name] = node.group
13885 all_instances = lu.cfg.GetAllInstancesInfo()
13886 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13888 for instance in all_instances.values():
13889 node = instance.primary_node
13890 if node in node_to_group:
13891 group_to_instances[node_to_group[node]].append(instance.name)
13894 # Do not pass on node information if it was not requested.
13895 group_to_nodes = None
13897 return query.GroupQueryData(self._cluster,
13898 [self._all_groups[uuid]
13899 for uuid in self.wanted],
13900 group_to_nodes, group_to_instances,
13901 query.GQ_DISKPARAMS in self.requested_data)
13904 class LUGroupQuery(NoHooksLU):
13905 """Logical unit for querying node groups.
13910 def CheckArguments(self):
13911 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13912 self.op.output_fields, False)
13914 def ExpandNames(self):
13915 self.gq.ExpandNames(self)
13917 def DeclareLocks(self, level):
13918 self.gq.DeclareLocks(self, level)
13920 def Exec(self, feedback_fn):
13921 return self.gq.OldStyleQuery(self)
13924 class LUGroupSetParams(LogicalUnit):
13925 """Modifies the parameters of a node group.
13928 HPATH = "group-modify"
13929 HTYPE = constants.HTYPE_GROUP
13932 def CheckArguments(self):
13935 self.op.diskparams,
13936 self.op.alloc_policy,
13938 self.op.disk_state,
13942 if all_changes.count(None) == len(all_changes):
13943 raise errors.OpPrereqError("Please pass at least one modification",
13944 errors.ECODE_INVAL)
13946 def ExpandNames(self):
13947 # This raises errors.OpPrereqError on its own:
13948 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13950 self.needed_locks = {
13951 locking.LEVEL_INSTANCE: [],
13952 locking.LEVEL_NODEGROUP: [self.group_uuid],
13955 self.share_locks[locking.LEVEL_INSTANCE] = 1
13957 def DeclareLocks(self, level):
13958 if level == locking.LEVEL_INSTANCE:
13959 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13961 # Lock instances optimistically, needs verification once group lock has
13963 self.needed_locks[locking.LEVEL_INSTANCE] = \
13964 self.cfg.GetNodeGroupInstances(self.group_uuid)
13967 def _UpdateAndVerifyDiskParams(old, new):
13968 """Updates and verifies disk parameters.
13971 new_params = _GetUpdatedParams(old, new)
13972 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13975 def CheckPrereq(self):
13976 """Check prerequisites.
13979 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13981 # Check if locked instances are still correct
13982 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13984 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13985 cluster = self.cfg.GetClusterInfo()
13987 if self.group is None:
13988 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13989 (self.op.group_name, self.group_uuid))
13991 if self.op.ndparams:
13992 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13993 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13994 self.new_ndparams = new_ndparams
13996 if self.op.diskparams:
13997 diskparams = self.group.diskparams
13998 uavdp = self._UpdateAndVerifyDiskParams
13999 # For each disktemplate subdict update and verify the values
14000 new_diskparams = dict((dt,
14001 uavdp(diskparams.get(dt, {}),
14002 self.op.diskparams[dt]))
14003 for dt in constants.DISK_TEMPLATES
14004 if dt in self.op.diskparams)
14005 # As we've all subdicts of diskparams ready, lets merge the actual
14006 # dict with all updated subdicts
14007 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14009 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14010 except errors.OpPrereqError, err:
14011 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14012 errors.ECODE_INVAL)
14014 if self.op.hv_state:
14015 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14016 self.group.hv_state_static)
14018 if self.op.disk_state:
14019 self.new_disk_state = \
14020 _MergeAndVerifyDiskState(self.op.disk_state,
14021 self.group.disk_state_static)
14023 if self.op.ipolicy:
14024 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14028 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14029 inst_filter = lambda inst: inst.name in owned_instances
14030 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14032 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14034 new_ipolicy, instances)
14037 self.LogWarning("After the ipolicy change the following instances"
14038 " violate them: %s",
14039 utils.CommaJoin(violations))
14041 def BuildHooksEnv(self):
14042 """Build hooks env.
14046 "GROUP_NAME": self.op.group_name,
14047 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14050 def BuildHooksNodes(self):
14051 """Build hooks nodes.
14054 mn = self.cfg.GetMasterNode()
14055 return ([mn], [mn])
14057 def Exec(self, feedback_fn):
14058 """Modifies the node group.
14063 if self.op.ndparams:
14064 self.group.ndparams = self.new_ndparams
14065 result.append(("ndparams", str(self.group.ndparams)))
14067 if self.op.diskparams:
14068 self.group.diskparams = self.new_diskparams
14069 result.append(("diskparams", str(self.group.diskparams)))
14071 if self.op.alloc_policy:
14072 self.group.alloc_policy = self.op.alloc_policy
14074 if self.op.hv_state:
14075 self.group.hv_state_static = self.new_hv_state
14077 if self.op.disk_state:
14078 self.group.disk_state_static = self.new_disk_state
14080 if self.op.ipolicy:
14081 self.group.ipolicy = self.new_ipolicy
14083 self.cfg.Update(self.group, feedback_fn)
14087 class LUGroupRemove(LogicalUnit):
14088 HPATH = "group-remove"
14089 HTYPE = constants.HTYPE_GROUP
14092 def ExpandNames(self):
14093 # This will raises errors.OpPrereqError on its own:
14094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14095 self.needed_locks = {
14096 locking.LEVEL_NODEGROUP: [self.group_uuid],
14099 def CheckPrereq(self):
14100 """Check prerequisites.
14102 This checks that the given group name exists as a node group, that is
14103 empty (i.e., contains no nodes), and that is not the last group of the
14107 # Verify that the group is empty.
14108 group_nodes = [node.name
14109 for node in self.cfg.GetAllNodesInfo().values()
14110 if node.group == self.group_uuid]
14113 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14115 (self.op.group_name,
14116 utils.CommaJoin(utils.NiceSort(group_nodes))),
14117 errors.ECODE_STATE)
14119 # Verify the cluster would not be left group-less.
14120 if len(self.cfg.GetNodeGroupList()) == 1:
14121 raise errors.OpPrereqError("Group '%s' is the only group,"
14122 " cannot be removed" %
14123 self.op.group_name,
14124 errors.ECODE_STATE)
14126 def BuildHooksEnv(self):
14127 """Build hooks env.
14131 "GROUP_NAME": self.op.group_name,
14134 def BuildHooksNodes(self):
14135 """Build hooks nodes.
14138 mn = self.cfg.GetMasterNode()
14139 return ([mn], [mn])
14141 def Exec(self, feedback_fn):
14142 """Remove the node group.
14146 self.cfg.RemoveNodeGroup(self.group_uuid)
14147 except errors.ConfigurationError:
14148 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14149 (self.op.group_name, self.group_uuid))
14151 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14154 class LUGroupRename(LogicalUnit):
14155 HPATH = "group-rename"
14156 HTYPE = constants.HTYPE_GROUP
14159 def ExpandNames(self):
14160 # This raises errors.OpPrereqError on its own:
14161 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14163 self.needed_locks = {
14164 locking.LEVEL_NODEGROUP: [self.group_uuid],
14167 def CheckPrereq(self):
14168 """Check prerequisites.
14170 Ensures requested new name is not yet used.
14174 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14175 except errors.OpPrereqError:
14178 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14179 " node group (UUID: %s)" %
14180 (self.op.new_name, new_name_uuid),
14181 errors.ECODE_EXISTS)
14183 def BuildHooksEnv(self):
14184 """Build hooks env.
14188 "OLD_NAME": self.op.group_name,
14189 "NEW_NAME": self.op.new_name,
14192 def BuildHooksNodes(self):
14193 """Build hooks nodes.
14196 mn = self.cfg.GetMasterNode()
14198 all_nodes = self.cfg.GetAllNodesInfo()
14199 all_nodes.pop(mn, None)
14202 run_nodes.extend(node.name for node in all_nodes.values()
14203 if node.group == self.group_uuid)
14205 return (run_nodes, run_nodes)
14207 def Exec(self, feedback_fn):
14208 """Rename the node group.
14211 group = self.cfg.GetNodeGroup(self.group_uuid)
14214 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14215 (self.op.group_name, self.group_uuid))
14217 group.name = self.op.new_name
14218 self.cfg.Update(group, feedback_fn)
14220 return self.op.new_name
14223 class LUGroupEvacuate(LogicalUnit):
14224 HPATH = "group-evacuate"
14225 HTYPE = constants.HTYPE_GROUP
14228 def ExpandNames(self):
14229 # This raises errors.OpPrereqError on its own:
14230 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14232 if self.op.target_groups:
14233 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14234 self.op.target_groups)
14236 self.req_target_uuids = []
14238 if self.group_uuid in self.req_target_uuids:
14239 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14240 " as a target group (targets are %s)" %
14242 utils.CommaJoin(self.req_target_uuids)),
14243 errors.ECODE_INVAL)
14245 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14247 self.share_locks = _ShareAll()
14248 self.needed_locks = {
14249 locking.LEVEL_INSTANCE: [],
14250 locking.LEVEL_NODEGROUP: [],
14251 locking.LEVEL_NODE: [],
14254 def DeclareLocks(self, level):
14255 if level == locking.LEVEL_INSTANCE:
14256 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14258 # Lock instances optimistically, needs verification once node and group
14259 # locks have been acquired
14260 self.needed_locks[locking.LEVEL_INSTANCE] = \
14261 self.cfg.GetNodeGroupInstances(self.group_uuid)
14263 elif level == locking.LEVEL_NODEGROUP:
14264 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14266 if self.req_target_uuids:
14267 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14269 # Lock all groups used by instances optimistically; this requires going
14270 # via the node before it's locked, requiring verification later on
14271 lock_groups.update(group_uuid
14272 for instance_name in
14273 self.owned_locks(locking.LEVEL_INSTANCE)
14275 self.cfg.GetInstanceNodeGroups(instance_name))
14277 # No target groups, need to lock all of them
14278 lock_groups = locking.ALL_SET
14280 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14282 elif level == locking.LEVEL_NODE:
14283 # This will only lock the nodes in the group to be evacuated which
14284 # contain actual instances
14285 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14286 self._LockInstancesNodes()
14288 # Lock all nodes in group to be evacuated and target groups
14289 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14290 assert self.group_uuid in owned_groups
14291 member_nodes = [node_name
14292 for group in owned_groups
14293 for node_name in self.cfg.GetNodeGroup(group).members]
14294 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14296 def CheckPrereq(self):
14297 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14298 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14299 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14301 assert owned_groups.issuperset(self.req_target_uuids)
14302 assert self.group_uuid in owned_groups
14304 # Check if locked instances are still correct
14305 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14307 # Get instance information
14308 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14310 # Check if node groups for locked instances are still correct
14311 _CheckInstancesNodeGroups(self.cfg, self.instances,
14312 owned_groups, owned_nodes, self.group_uuid)
14314 if self.req_target_uuids:
14315 # User requested specific target groups
14316 self.target_uuids = self.req_target_uuids
14318 # All groups except the one to be evacuated are potential targets
14319 self.target_uuids = [group_uuid for group_uuid in owned_groups
14320 if group_uuid != self.group_uuid]
14322 if not self.target_uuids:
14323 raise errors.OpPrereqError("There are no possible target groups",
14324 errors.ECODE_INVAL)
14326 def BuildHooksEnv(self):
14327 """Build hooks env.
14331 "GROUP_NAME": self.op.group_name,
14332 "TARGET_GROUPS": " ".join(self.target_uuids),
14335 def BuildHooksNodes(self):
14336 """Build hooks nodes.
14339 mn = self.cfg.GetMasterNode()
14341 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14343 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14345 return (run_nodes, run_nodes)
14347 def Exec(self, feedback_fn):
14348 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14350 assert self.group_uuid not in self.target_uuids
14352 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14353 instances=instances, target_groups=self.target_uuids)
14355 ial.Run(self.op.iallocator)
14357 if not ial.success:
14358 raise errors.OpPrereqError("Can't compute group evacuation using"
14359 " iallocator '%s': %s" %
14360 (self.op.iallocator, ial.info),
14361 errors.ECODE_NORES)
14363 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14365 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14366 len(jobs), self.op.group_name)
14368 return ResultWithJobs(jobs)
14371 class TagsLU(NoHooksLU): # pylint: disable=W0223
14372 """Generic tags LU.
14374 This is an abstract class which is the parent of all the other tags LUs.
14377 def ExpandNames(self):
14378 self.group_uuid = None
14379 self.needed_locks = {}
14381 if self.op.kind == constants.TAG_NODE:
14382 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14383 lock_level = locking.LEVEL_NODE
14384 lock_name = self.op.name
14385 elif self.op.kind == constants.TAG_INSTANCE:
14386 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14387 lock_level = locking.LEVEL_INSTANCE
14388 lock_name = self.op.name
14389 elif self.op.kind == constants.TAG_NODEGROUP:
14390 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14391 lock_level = locking.LEVEL_NODEGROUP
14392 lock_name = self.group_uuid
14397 if lock_level and getattr(self.op, "use_locking", True):
14398 self.needed_locks[lock_level] = lock_name
14400 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14401 # not possible to acquire the BGL based on opcode parameters)
14403 def CheckPrereq(self):
14404 """Check prerequisites.
14407 if self.op.kind == constants.TAG_CLUSTER:
14408 self.target = self.cfg.GetClusterInfo()
14409 elif self.op.kind == constants.TAG_NODE:
14410 self.target = self.cfg.GetNodeInfo(self.op.name)
14411 elif self.op.kind == constants.TAG_INSTANCE:
14412 self.target = self.cfg.GetInstanceInfo(self.op.name)
14413 elif self.op.kind == constants.TAG_NODEGROUP:
14414 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14416 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14417 str(self.op.kind), errors.ECODE_INVAL)
14420 class LUTagsGet(TagsLU):
14421 """Returns the tags of a given object.
14426 def ExpandNames(self):
14427 TagsLU.ExpandNames(self)
14429 # Share locks as this is only a read operation
14430 self.share_locks = _ShareAll()
14432 def Exec(self, feedback_fn):
14433 """Returns the tag list.
14436 return list(self.target.GetTags())
14439 class LUTagsSearch(NoHooksLU):
14440 """Searches the tags for a given pattern.
14445 def ExpandNames(self):
14446 self.needed_locks = {}
14448 def CheckPrereq(self):
14449 """Check prerequisites.
14451 This checks the pattern passed for validity by compiling it.
14455 self.re = re.compile(self.op.pattern)
14456 except re.error, err:
14457 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14458 (self.op.pattern, err), errors.ECODE_INVAL)
14460 def Exec(self, feedback_fn):
14461 """Returns the tag list.
14465 tgts = [("/cluster", cfg.GetClusterInfo())]
14466 ilist = cfg.GetAllInstancesInfo().values()
14467 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14468 nlist = cfg.GetAllNodesInfo().values()
14469 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14470 tgts.extend(("/nodegroup/%s" % n.name, n)
14471 for n in cfg.GetAllNodeGroupsInfo().values())
14473 for path, target in tgts:
14474 for tag in target.GetTags():
14475 if self.re.search(tag):
14476 results.append((path, tag))
14480 class LUTagsSet(TagsLU):
14481 """Sets a tag on a given object.
14486 def CheckPrereq(self):
14487 """Check prerequisites.
14489 This checks the type and length of the tag name and value.
14492 TagsLU.CheckPrereq(self)
14493 for tag in self.op.tags:
14494 objects.TaggableObject.ValidateTag(tag)
14496 def Exec(self, feedback_fn):
14501 for tag in self.op.tags:
14502 self.target.AddTag(tag)
14503 except errors.TagError, err:
14504 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14505 self.cfg.Update(self.target, feedback_fn)
14508 class LUTagsDel(TagsLU):
14509 """Delete a list of tags from a given object.
14514 def CheckPrereq(self):
14515 """Check prerequisites.
14517 This checks that we have the given tag.
14520 TagsLU.CheckPrereq(self)
14521 for tag in self.op.tags:
14522 objects.TaggableObject.ValidateTag(tag)
14523 del_tags = frozenset(self.op.tags)
14524 cur_tags = self.target.GetTags()
14526 diff_tags = del_tags - cur_tags
14528 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14529 raise errors.OpPrereqError("Tag(s) %s not found" %
14530 (utils.CommaJoin(diff_names), ),
14531 errors.ECODE_NOENT)
14533 def Exec(self, feedback_fn):
14534 """Remove the tag from the object.
14537 for tag in self.op.tags:
14538 self.target.RemoveTag(tag)
14539 self.cfg.Update(self.target, feedback_fn)
14542 class LUTestDelay(NoHooksLU):
14543 """Sleep for a specified amount of time.
14545 This LU sleeps on the master and/or nodes for a specified amount of
14551 def ExpandNames(self):
14552 """Expand names and set required locks.
14554 This expands the node list, if any.
14557 self.needed_locks = {}
14558 if self.op.on_nodes:
14559 # _GetWantedNodes can be used here, but is not always appropriate to use
14560 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14561 # more information.
14562 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14563 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14565 def _TestDelay(self):
14566 """Do the actual sleep.
14569 if self.op.on_master:
14570 if not utils.TestDelay(self.op.duration):
14571 raise errors.OpExecError("Error during master delay test")
14572 if self.op.on_nodes:
14573 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14574 for node, node_result in result.items():
14575 node_result.Raise("Failure during rpc call to node %s" % node)
14577 def Exec(self, feedback_fn):
14578 """Execute the test delay opcode, with the wanted repetitions.
14581 if self.op.repeat == 0:
14584 top_value = self.op.repeat - 1
14585 for i in range(self.op.repeat):
14586 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14590 class LUTestJqueue(NoHooksLU):
14591 """Utility LU to test some aspects of the job queue.
14596 # Must be lower than default timeout for WaitForJobChange to see whether it
14597 # notices changed jobs
14598 _CLIENT_CONNECT_TIMEOUT = 20.0
14599 _CLIENT_CONFIRM_TIMEOUT = 60.0
14602 def _NotifyUsingSocket(cls, cb, errcls):
14603 """Opens a Unix socket and waits for another program to connect.
14606 @param cb: Callback to send socket name to client
14607 @type errcls: class
14608 @param errcls: Exception class to use for errors
14611 # Using a temporary directory as there's no easy way to create temporary
14612 # sockets without writing a custom loop around tempfile.mktemp and
14614 tmpdir = tempfile.mkdtemp()
14616 tmpsock = utils.PathJoin(tmpdir, "sock")
14618 logging.debug("Creating temporary socket at %s", tmpsock)
14619 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14624 # Send details to client
14627 # Wait for client to connect before continuing
14628 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14630 (conn, _) = sock.accept()
14631 except socket.error, err:
14632 raise errcls("Client didn't connect in time (%s)" % err)
14636 # Remove as soon as client is connected
14637 shutil.rmtree(tmpdir)
14639 # Wait for client to close
14642 # pylint: disable=E1101
14643 # Instance of '_socketobject' has no ... member
14644 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14646 except socket.error, err:
14647 raise errcls("Client failed to confirm notification (%s)" % err)
14651 def _SendNotification(self, test, arg, sockname):
14652 """Sends a notification to the client.
14655 @param test: Test name
14656 @param arg: Test argument (depends on test)
14657 @type sockname: string
14658 @param sockname: Socket path
14661 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14663 def _Notify(self, prereq, test, arg):
14664 """Notifies the client of a test.
14667 @param prereq: Whether this is a prereq-phase test
14669 @param test: Test name
14670 @param arg: Test argument (depends on test)
14674 errcls = errors.OpPrereqError
14676 errcls = errors.OpExecError
14678 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14682 def CheckArguments(self):
14683 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14684 self.expandnames_calls = 0
14686 def ExpandNames(self):
14687 checkargs_calls = getattr(self, "checkargs_calls", 0)
14688 if checkargs_calls < 1:
14689 raise errors.ProgrammerError("CheckArguments was not called")
14691 self.expandnames_calls += 1
14693 if self.op.notify_waitlock:
14694 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14696 self.LogInfo("Expanding names")
14698 # Get lock on master node (just to get a lock, not for a particular reason)
14699 self.needed_locks = {
14700 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14703 def Exec(self, feedback_fn):
14704 if self.expandnames_calls < 1:
14705 raise errors.ProgrammerError("ExpandNames was not called")
14707 if self.op.notify_exec:
14708 self._Notify(False, constants.JQT_EXEC, None)
14710 self.LogInfo("Executing")
14712 if self.op.log_messages:
14713 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14714 for idx, msg in enumerate(self.op.log_messages):
14715 self.LogInfo("Sending log message %s", idx + 1)
14716 feedback_fn(constants.JQT_MSGPREFIX + msg)
14717 # Report how many test messages have been sent
14718 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14721 raise errors.OpExecError("Opcode failure was requested")
14726 class IAllocator(object):
14727 """IAllocator framework.
14729 An IAllocator instance has three sets of attributes:
14730 - cfg that is needed to query the cluster
14731 - input data (all members of the _KEYS class attribute are required)
14732 - four buffer attributes (in|out_data|text), that represent the
14733 input (to the external script) in text and data structure format,
14734 and the output from it, again in two formats
14735 - the result variables from the script (success, info, nodes) for
14739 # pylint: disable=R0902
14740 # lots of instance attributes
14742 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14744 self.rpc = rpc_runner
14745 # init buffer variables
14746 self.in_text = self.out_text = self.in_data = self.out_data = None
14747 # init all input fields so that pylint is happy
14749 self.memory = self.disks = self.disk_template = self.spindle_use = None
14750 self.os = self.tags = self.nics = self.vcpus = None
14751 self.hypervisor = None
14752 self.relocate_from = None
14754 self.instances = None
14755 self.evac_mode = None
14756 self.target_groups = []
14758 self.required_nodes = None
14759 # init result fields
14760 self.success = self.info = self.result = None
14763 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14765 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14766 " IAllocator" % self.mode)
14768 keyset = [n for (n, _) in keydata]
14771 if key not in keyset:
14772 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14773 " IAllocator" % key)
14774 setattr(self, key, kwargs[key])
14777 if key not in kwargs:
14778 raise errors.ProgrammerError("Missing input parameter '%s' to"
14779 " IAllocator" % key)
14780 self._BuildInputData(compat.partial(fn, self), keydata)
14782 def _ComputeClusterData(self):
14783 """Compute the generic allocator input data.
14785 This is the data that is independent of the actual operation.
14789 cluster_info = cfg.GetClusterInfo()
14792 "version": constants.IALLOCATOR_VERSION,
14793 "cluster_name": cfg.GetClusterName(),
14794 "cluster_tags": list(cluster_info.GetTags()),
14795 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14796 "ipolicy": cluster_info.ipolicy,
14798 ninfo = cfg.GetAllNodesInfo()
14799 iinfo = cfg.GetAllInstancesInfo().values()
14800 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14803 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14805 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14806 hypervisor_name = self.hypervisor
14807 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14808 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14810 hypervisor_name = cluster_info.primary_hypervisor
14812 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14815 self.rpc.call_all_instances_info(node_list,
14816 cluster_info.enabled_hypervisors)
14818 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14820 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14821 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14822 i_list, config_ndata)
14823 assert len(data["nodes"]) == len(ninfo), \
14824 "Incomplete node data computed"
14826 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14828 self.in_data = data
14831 def _ComputeNodeGroupData(cfg):
14832 """Compute node groups data.
14835 cluster = cfg.GetClusterInfo()
14836 ng = dict((guuid, {
14837 "name": gdata.name,
14838 "alloc_policy": gdata.alloc_policy,
14839 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14841 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14846 def _ComputeBasicNodeData(cfg, node_cfg):
14847 """Compute global node data.
14850 @returns: a dict of name: (node dict, node config)
14853 # fill in static (config-based) values
14854 node_results = dict((ninfo.name, {
14855 "tags": list(ninfo.GetTags()),
14856 "primary_ip": ninfo.primary_ip,
14857 "secondary_ip": ninfo.secondary_ip,
14858 "offline": ninfo.offline,
14859 "drained": ninfo.drained,
14860 "master_candidate": ninfo.master_candidate,
14861 "group": ninfo.group,
14862 "master_capable": ninfo.master_capable,
14863 "vm_capable": ninfo.vm_capable,
14864 "ndparams": cfg.GetNdParams(ninfo),
14866 for ninfo in node_cfg.values())
14868 return node_results
14871 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14873 """Compute global node data.
14875 @param node_results: the basic node structures as filled from the config
14878 #TODO(dynmem): compute the right data on MAX and MIN memory
14879 # make a copy of the current dict
14880 node_results = dict(node_results)
14881 for nname, nresult in node_data.items():
14882 assert nname in node_results, "Missing basic data for node %s" % nname
14883 ninfo = node_cfg[nname]
14885 if not (ninfo.offline or ninfo.drained):
14886 nresult.Raise("Can't get data for node %s" % nname)
14887 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14889 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14891 for attr in ["memory_total", "memory_free", "memory_dom0",
14892 "vg_size", "vg_free", "cpu_total"]:
14893 if attr not in remote_info:
14894 raise errors.OpExecError("Node '%s' didn't return attribute"
14895 " '%s'" % (nname, attr))
14896 if not isinstance(remote_info[attr], int):
14897 raise errors.OpExecError("Node '%s' returned invalid value"
14899 (nname, attr, remote_info[attr]))
14900 # compute memory used by primary instances
14901 i_p_mem = i_p_up_mem = 0
14902 for iinfo, beinfo in i_list:
14903 if iinfo.primary_node == nname:
14904 i_p_mem += beinfo[constants.BE_MAXMEM]
14905 if iinfo.name not in node_iinfo[nname].payload:
14908 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14909 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14910 remote_info["memory_free"] -= max(0, i_mem_diff)
14912 if iinfo.admin_state == constants.ADMINST_UP:
14913 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14915 # compute memory used by instances
14917 "total_memory": remote_info["memory_total"],
14918 "reserved_memory": remote_info["memory_dom0"],
14919 "free_memory": remote_info["memory_free"],
14920 "total_disk": remote_info["vg_size"],
14921 "free_disk": remote_info["vg_free"],
14922 "total_cpus": remote_info["cpu_total"],
14923 "i_pri_memory": i_p_mem,
14924 "i_pri_up_memory": i_p_up_mem,
14926 pnr_dyn.update(node_results[nname])
14927 node_results[nname] = pnr_dyn
14929 return node_results
14932 def _ComputeInstanceData(cluster_info, i_list):
14933 """Compute global instance data.
14937 for iinfo, beinfo in i_list:
14939 for nic in iinfo.nics:
14940 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14944 "mode": filled_params[constants.NIC_MODE],
14945 "link": filled_params[constants.NIC_LINK],
14947 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14948 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14949 nic_data.append(nic_dict)
14951 "tags": list(iinfo.GetTags()),
14952 "admin_state": iinfo.admin_state,
14953 "vcpus": beinfo[constants.BE_VCPUS],
14954 "memory": beinfo[constants.BE_MAXMEM],
14955 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14957 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14959 "disks": [{constants.IDISK_SIZE: dsk.size,
14960 constants.IDISK_MODE: dsk.mode}
14961 for dsk in iinfo.disks],
14962 "disk_template": iinfo.disk_template,
14963 "hypervisor": iinfo.hypervisor,
14965 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14967 instance_data[iinfo.name] = pir
14969 return instance_data
14971 def _AddNewInstance(self):
14972 """Add new instance data to allocator structure.
14974 This in combination with _AllocatorGetClusterData will create the
14975 correct structure needed as input for the allocator.
14977 The checks for the completeness of the opcode must have already been
14981 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14983 if self.disk_template in constants.DTS_INT_MIRROR:
14984 self.required_nodes = 2
14986 self.required_nodes = 1
14990 "disk_template": self.disk_template,
14993 "vcpus": self.vcpus,
14994 "memory": self.memory,
14995 "spindle_use": self.spindle_use,
14996 "disks": self.disks,
14997 "disk_space_total": disk_space,
14999 "required_nodes": self.required_nodes,
15000 "hypervisor": self.hypervisor,
15005 def _AddRelocateInstance(self):
15006 """Add relocate instance data to allocator structure.
15008 This in combination with _IAllocatorGetClusterData will create the
15009 correct structure needed as input for the allocator.
15011 The checks for the completeness of the opcode must have already been
15015 instance = self.cfg.GetInstanceInfo(self.name)
15016 if instance is None:
15017 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15018 " IAllocator" % self.name)
15020 if instance.disk_template not in constants.DTS_MIRRORED:
15021 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15022 errors.ECODE_INVAL)
15024 if instance.disk_template in constants.DTS_INT_MIRROR and \
15025 len(instance.secondary_nodes) != 1:
15026 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15027 errors.ECODE_STATE)
15029 self.required_nodes = 1
15030 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15031 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15035 "disk_space_total": disk_space,
15036 "required_nodes": self.required_nodes,
15037 "relocate_from": self.relocate_from,
15041 def _AddNodeEvacuate(self):
15042 """Get data for node-evacuate requests.
15046 "instances": self.instances,
15047 "evac_mode": self.evac_mode,
15050 def _AddChangeGroup(self):
15051 """Get data for node-evacuate requests.
15055 "instances": self.instances,
15056 "target_groups": self.target_groups,
15059 def _BuildInputData(self, fn, keydata):
15060 """Build input data structures.
15063 self._ComputeClusterData()
15066 request["type"] = self.mode
15067 for keyname, keytype in keydata:
15068 if keyname not in request:
15069 raise errors.ProgrammerError("Request parameter %s is missing" %
15071 val = request[keyname]
15072 if not keytype(val):
15073 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15074 " validation, value %s, expected"
15075 " type %s" % (keyname, val, keytype))
15076 self.in_data["request"] = request
15078 self.in_text = serializer.Dump(self.in_data)
15080 _STRING_LIST = ht.TListOf(ht.TString)
15081 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15082 # pylint: disable=E1101
15083 # Class '...' has no 'OP_ID' member
15084 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15085 opcodes.OpInstanceMigrate.OP_ID,
15086 opcodes.OpInstanceReplaceDisks.OP_ID])
15090 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15091 ht.TItems([ht.TNonEmptyString,
15092 ht.TNonEmptyString,
15093 ht.TListOf(ht.TNonEmptyString),
15096 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15097 ht.TItems([ht.TNonEmptyString,
15100 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15101 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15104 constants.IALLOCATOR_MODE_ALLOC:
15107 ("name", ht.TString),
15108 ("memory", ht.TInt),
15109 ("spindle_use", ht.TInt),
15110 ("disks", ht.TListOf(ht.TDict)),
15111 ("disk_template", ht.TString),
15112 ("os", ht.TString),
15113 ("tags", _STRING_LIST),
15114 ("nics", ht.TListOf(ht.TDict)),
15115 ("vcpus", ht.TInt),
15116 ("hypervisor", ht.TString),
15118 constants.IALLOCATOR_MODE_RELOC:
15119 (_AddRelocateInstance,
15120 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15122 constants.IALLOCATOR_MODE_NODE_EVAC:
15123 (_AddNodeEvacuate, [
15124 ("instances", _STRING_LIST),
15125 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15127 constants.IALLOCATOR_MODE_CHG_GROUP:
15128 (_AddChangeGroup, [
15129 ("instances", _STRING_LIST),
15130 ("target_groups", _STRING_LIST),
15134 def Run(self, name, validate=True, call_fn=None):
15135 """Run an instance allocator and return the results.
15138 if call_fn is None:
15139 call_fn = self.rpc.call_iallocator_runner
15141 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15142 result.Raise("Failure while running the iallocator script")
15144 self.out_text = result.payload
15146 self._ValidateResult()
15148 def _ValidateResult(self):
15149 """Process the allocator results.
15151 This will process and if successful save the result in
15152 self.out_data and the other parameters.
15156 rdict = serializer.Load(self.out_text)
15157 except Exception, err:
15158 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15160 if not isinstance(rdict, dict):
15161 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15163 # TODO: remove backwards compatiblity in later versions
15164 if "nodes" in rdict and "result" not in rdict:
15165 rdict["result"] = rdict["nodes"]
15168 for key in "success", "info", "result":
15169 if key not in rdict:
15170 raise errors.OpExecError("Can't parse iallocator results:"
15171 " missing key '%s'" % key)
15172 setattr(self, key, rdict[key])
15174 if not self._result_check(self.result):
15175 raise errors.OpExecError("Iallocator returned invalid result,"
15176 " expected %s, got %s" %
15177 (self._result_check, self.result),
15178 errors.ECODE_INVAL)
15180 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15181 assert self.relocate_from is not None
15182 assert self.required_nodes == 1
15184 node2group = dict((name, ndata["group"])
15185 for (name, ndata) in self.in_data["nodes"].items())
15187 fn = compat.partial(self._NodesToGroups, node2group,
15188 self.in_data["nodegroups"])
15190 instance = self.cfg.GetInstanceInfo(self.name)
15191 request_groups = fn(self.relocate_from + [instance.primary_node])
15192 result_groups = fn(rdict["result"] + [instance.primary_node])
15194 if self.success and not set(result_groups).issubset(request_groups):
15195 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15196 " differ from original groups (%s)" %
15197 (utils.CommaJoin(result_groups),
15198 utils.CommaJoin(request_groups)))
15200 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15201 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15203 self.out_data = rdict
15206 def _NodesToGroups(node2group, groups, nodes):
15207 """Returns a list of unique group names for a list of nodes.
15209 @type node2group: dict
15210 @param node2group: Map from node name to group UUID
15212 @param groups: Group information
15214 @param nodes: Node names
15221 group_uuid = node2group[node]
15223 # Ignore unknown node
15227 group = groups[group_uuid]
15229 # Can't find group, let's use UUID
15230 group_name = group_uuid
15232 group_name = group["name"]
15234 result.add(group_name)
15236 return sorted(result)
15239 class LUTestAllocator(NoHooksLU):
15240 """Run allocator tests.
15242 This LU runs the allocator tests
15245 def CheckPrereq(self):
15246 """Check prerequisites.
15248 This checks the opcode parameters depending on the director and mode test.
15251 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15252 for attr in ["memory", "disks", "disk_template",
15253 "os", "tags", "nics", "vcpus"]:
15254 if not hasattr(self.op, attr):
15255 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15256 attr, errors.ECODE_INVAL)
15257 iname = self.cfg.ExpandInstanceName(self.op.name)
15258 if iname is not None:
15259 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15260 iname, errors.ECODE_EXISTS)
15261 if not isinstance(self.op.nics, list):
15262 raise errors.OpPrereqError("Invalid parameter 'nics'",
15263 errors.ECODE_INVAL)
15264 if not isinstance(self.op.disks, list):
15265 raise errors.OpPrereqError("Invalid parameter 'disks'",
15266 errors.ECODE_INVAL)
15267 for row in self.op.disks:
15268 if (not isinstance(row, dict) or
15269 constants.IDISK_SIZE not in row or
15270 not isinstance(row[constants.IDISK_SIZE], int) or
15271 constants.IDISK_MODE not in row or
15272 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15273 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15274 " parameter", errors.ECODE_INVAL)
15275 if self.op.hypervisor is None:
15276 self.op.hypervisor = self.cfg.GetHypervisorType()
15277 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15278 fname = _ExpandInstanceName(self.cfg, self.op.name)
15279 self.op.name = fname
15280 self.relocate_from = \
15281 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15282 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15283 constants.IALLOCATOR_MODE_NODE_EVAC):
15284 if not self.op.instances:
15285 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15286 self.op.instances = _GetWantedInstances(self, self.op.instances)
15288 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15289 self.op.mode, errors.ECODE_INVAL)
15291 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15292 if self.op.allocator is None:
15293 raise errors.OpPrereqError("Missing allocator name",
15294 errors.ECODE_INVAL)
15295 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15296 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15297 self.op.direction, errors.ECODE_INVAL)
15299 def Exec(self, feedback_fn):
15300 """Run the allocator test.
15303 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15304 ial = IAllocator(self.cfg, self.rpc,
15307 memory=self.op.memory,
15308 disks=self.op.disks,
15309 disk_template=self.op.disk_template,
15313 vcpus=self.op.vcpus,
15314 hypervisor=self.op.hypervisor,
15316 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15317 ial = IAllocator(self.cfg, self.rpc,
15320 relocate_from=list(self.relocate_from),
15322 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15323 ial = IAllocator(self.cfg, self.rpc,
15325 instances=self.op.instances,
15326 target_groups=self.op.target_groups)
15327 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15328 ial = IAllocator(self.cfg, self.rpc,
15330 instances=self.op.instances,
15331 evac_mode=self.op.evac_mode)
15333 raise errors.ProgrammerError("Uncatched mode %s in"
15334 " LUTestAllocator.Exec", self.op.mode)
15336 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15337 result = ial.in_text
15339 ial.Run(self.op.allocator, validate=False)
15340 result = ial.out_text
15344 #: Query type implementations
15346 constants.QR_CLUSTER: _ClusterQuery,
15347 constants.QR_INSTANCE: _InstanceQuery,
15348 constants.QR_NODE: _NodeQuery,
15349 constants.QR_GROUP: _GroupQuery,
15350 constants.QR_OS: _OsQuery,
15351 constants.QR_EXPORT: _ExportQuery,
15354 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15357 def _GetQueryImplementation(name):
15358 """Returns the implemtnation for a query type.
15360 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15364 return _QUERY_IMPL[name]
15366 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15367 errors.ECODE_INVAL)