4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
645 """Checks if the owned node groups are still correct for an instance.
647 @type cfg: L{config.ConfigWriter}
648 @param cfg: The cluster configuration
649 @type instance_name: string
650 @param instance_name: Instance name
651 @type owned_groups: set or frozenset
652 @param owned_groups: List of currently owned node groups
653 @type primary_only: boolean
654 @param primary_only: Whether to check node groups for only the primary node
657 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
659 if not owned_groups.issuperset(inst_groups):
660 raise errors.OpPrereqError("Instance %s's node groups changed since"
661 " locks were acquired, current groups are"
662 " are '%s', owning groups '%s'; retry the"
665 utils.CommaJoin(inst_groups),
666 utils.CommaJoin(owned_groups)),
672 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
673 """Checks if the instances in a node group are still correct.
675 @type cfg: L{config.ConfigWriter}
676 @param cfg: The cluster configuration
677 @type group_uuid: string
678 @param group_uuid: Node group UUID
679 @type owned_instances: set or frozenset
680 @param owned_instances: List of currently owned instances
683 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
684 if owned_instances != wanted_instances:
685 raise errors.OpPrereqError("Instances in node group '%s' changed since"
686 " locks were acquired, wanted '%s', have '%s';"
687 " retry the operation" %
689 utils.CommaJoin(wanted_instances),
690 utils.CommaJoin(owned_instances)),
693 return wanted_instances
696 def _SupportsOob(cfg, node):
697 """Tells if node supports OOB.
699 @type cfg: L{config.ConfigWriter}
700 @param cfg: The cluster configuration
701 @type node: L{objects.Node}
702 @param node: The node
703 @return: The OOB script if supported or an empty string otherwise
706 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
709 def _GetWantedNodes(lu, nodes):
710 """Returns list of checked and expanded node names.
712 @type lu: L{LogicalUnit}
713 @param lu: the logical unit on whose behalf we execute
715 @param nodes: list of node names or None for all nodes
717 @return: the list of nodes, sorted
718 @raise errors.ProgrammerError: if the nodes parameter is wrong type
722 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
724 return utils.NiceSort(lu.cfg.GetNodeList())
727 def _GetWantedInstances(lu, instances):
728 """Returns list of checked and expanded instance names.
730 @type lu: L{LogicalUnit}
731 @param lu: the logical unit on whose behalf we execute
732 @type instances: list
733 @param instances: list of instance names or None for all instances
735 @return: the list of instances, sorted
736 @raise errors.OpPrereqError: if the instances parameter is wrong type
737 @raise errors.OpPrereqError: if any of the passed instances is not found
741 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
743 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
747 def _GetUpdatedParams(old_params, update_dict,
748 use_default=True, use_none=False):
749 """Return the new version of a parameter dictionary.
751 @type old_params: dict
752 @param old_params: old parameters
753 @type update_dict: dict
754 @param update_dict: dict containing new parameter values, or
755 constants.VALUE_DEFAULT to reset the parameter to its default
757 @param use_default: boolean
758 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
759 values as 'to be deleted' values
760 @param use_none: boolean
761 @type use_none: whether to recognise C{None} values as 'to be
764 @return: the new parameter dictionary
767 params_copy = copy.deepcopy(old_params)
768 for key, val in update_dict.iteritems():
769 if ((use_default and val == constants.VALUE_DEFAULT) or
770 (use_none and val is None)):
776 params_copy[key] = val
780 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
781 """Return the new version of a instance policy.
783 @param group_policy: whether this policy applies to a group and thus
784 we should support removal of policy entries
787 use_none = use_default = group_policy
788 ipolicy = copy.deepcopy(old_ipolicy)
789 for key, value in new_ipolicy.items():
790 if key not in constants.IPOLICY_ALL_KEYS:
791 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
793 if key in constants.IPOLICY_ISPECS:
794 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
795 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
797 use_default=use_default)
799 if (not value or value == [constants.VALUE_DEFAULT] or
800 value == constants.VALUE_DEFAULT):
804 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
805 " on the cluster'" % key,
808 if key in constants.IPOLICY_PARAMETERS:
809 # FIXME: we assume all such values are float
811 ipolicy[key] = float(value)
812 except (TypeError, ValueError), err:
813 raise errors.OpPrereqError("Invalid value for attribute"
814 " '%s': '%s', error: %s" %
815 (key, value, err), errors.ECODE_INVAL)
817 # FIXME: we assume all others are lists; this should be redone
819 ipolicy[key] = list(value)
821 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
822 except errors.ConfigurationError, err:
823 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
828 def _UpdateAndVerifySubDict(base, updates, type_check):
829 """Updates and verifies a dict with sub dicts of the same type.
831 @param base: The dict with the old data
832 @param updates: The dict with the new data
833 @param type_check: Dict suitable to ForceDictType to verify correct types
834 @returns: A new dict with updated and verified values
838 new = _GetUpdatedParams(old, value)
839 utils.ForceDictType(new, type_check)
842 ret = copy.deepcopy(base)
843 ret.update(dict((key, fn(base.get(key, {}), value))
844 for key, value in updates.items()))
848 def _MergeAndVerifyHvState(op_input, obj_input):
849 """Combines the hv state from an opcode with the one of the object
851 @param op_input: The input dict from the opcode
852 @param obj_input: The input dict from the objects
853 @return: The verified and updated dict
857 invalid_hvs = set(op_input) - constants.HYPER_TYPES
859 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
860 " %s" % utils.CommaJoin(invalid_hvs),
862 if obj_input is None:
864 type_check = constants.HVSTS_PARAMETER_TYPES
865 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
870 def _MergeAndVerifyDiskState(op_input, obj_input):
871 """Combines the disk state from an opcode with the one of the object
873 @param op_input: The input dict from the opcode
874 @param obj_input: The input dict from the objects
875 @return: The verified and updated dict
878 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
880 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
881 utils.CommaJoin(invalid_dst),
883 type_check = constants.DSS_PARAMETER_TYPES
884 if obj_input is None:
886 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
888 for key, value in op_input.items())
893 def _ReleaseLocks(lu, level, names=None, keep=None):
894 """Releases locks owned by an LU.
896 @type lu: L{LogicalUnit}
897 @param level: Lock level
898 @type names: list or None
899 @param names: Names of locks to release
900 @type keep: list or None
901 @param keep: Names of locks to retain
904 assert not (keep is not None and names is not None), \
905 "Only one of the 'names' and the 'keep' parameters can be given"
907 if names is not None:
908 should_release = names.__contains__
910 should_release = lambda name: name not in keep
912 should_release = None
914 owned = lu.owned_locks(level)
916 # Not owning any lock at this level, do nothing
923 # Determine which locks to release
925 if should_release(name):
930 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
932 # Release just some locks
933 lu.glm.release(level, names=release)
935 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
938 lu.glm.release(level)
940 assert not lu.glm.is_owned(level), "No locks should be owned"
943 def _MapInstanceDisksToNodes(instances):
944 """Creates a map from (node, volume) to instance name.
946 @type instances: list of L{objects.Instance}
947 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
950 return dict(((node, vol), inst.name)
951 for inst in instances
952 for (node, vols) in inst.MapLVsByNode().items()
956 def _RunPostHook(lu, node_name):
957 """Runs the post-hook for an opcode on a single node.
960 hm = lu.proc.BuildHooksManager(lu)
962 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
964 # pylint: disable=W0702
965 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
968 def _CheckOutputFields(static, dynamic, selected):
969 """Checks whether all selected fields are valid.
971 @type static: L{utils.FieldSet}
972 @param static: static fields set
973 @type dynamic: L{utils.FieldSet}
974 @param dynamic: dynamic fields set
981 delta = f.NonMatching(selected)
983 raise errors.OpPrereqError("Unknown output fields selected: %s"
984 % ",".join(delta), errors.ECODE_INVAL)
987 def _CheckGlobalHvParams(params):
988 """Validates that given hypervisor params are not global ones.
990 This will ensure that instances don't get customised versions of
994 used_globals = constants.HVC_GLOBALS.intersection(params)
996 msg = ("The following hypervisor parameters are global and cannot"
997 " be customized at instance level, please modify them at"
998 " cluster level: %s" % utils.CommaJoin(used_globals))
999 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1002 def _CheckNodeOnline(lu, node, msg=None):
1003 """Ensure that a given node is online.
1005 @param lu: the LU on behalf of which we make the check
1006 @param node: the node to check
1007 @param msg: if passed, should be a message to replace the default one
1008 @raise errors.OpPrereqError: if the node is offline
1012 msg = "Can't use offline node"
1013 if lu.cfg.GetNodeInfo(node).offline:
1014 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1017 def _CheckNodeNotDrained(lu, node):
1018 """Ensure that a given node is not drained.
1020 @param lu: the LU on behalf of which we make the check
1021 @param node: the node to check
1022 @raise errors.OpPrereqError: if the node is drained
1025 if lu.cfg.GetNodeInfo(node).drained:
1026 raise errors.OpPrereqError("Can't use drained node %s" % node,
1030 def _CheckNodeVmCapable(lu, node):
1031 """Ensure that a given node is vm capable.
1033 @param lu: the LU on behalf of which we make the check
1034 @param node: the node to check
1035 @raise errors.OpPrereqError: if the node is not vm capable
1038 if not lu.cfg.GetNodeInfo(node).vm_capable:
1039 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1043 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1044 """Ensure that a node supports a given OS.
1046 @param lu: the LU on behalf of which we make the check
1047 @param node: the node to check
1048 @param os_name: the OS to query about
1049 @param force_variant: whether to ignore variant errors
1050 @raise errors.OpPrereqError: if the node is not supporting the OS
1053 result = lu.rpc.call_os_get(node, os_name)
1054 result.Raise("OS '%s' not in supported OS list for node %s" %
1056 prereq=True, ecode=errors.ECODE_INVAL)
1057 if not force_variant:
1058 _CheckOSVariant(result.payload, os_name)
1061 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1062 """Ensure that a node has the given secondary ip.
1064 @type lu: L{LogicalUnit}
1065 @param lu: the LU on behalf of which we make the check
1067 @param node: the node to check
1068 @type secondary_ip: string
1069 @param secondary_ip: the ip to check
1070 @type prereq: boolean
1071 @param prereq: whether to throw a prerequisite or an execute error
1072 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1073 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1076 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1077 result.Raise("Failure checking secondary ip on node %s" % node,
1078 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1079 if not result.payload:
1080 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1081 " please fix and re-run this command" % secondary_ip)
1083 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1085 raise errors.OpExecError(msg)
1088 def _GetClusterDomainSecret():
1089 """Reads the cluster domain secret.
1092 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1096 def _CheckInstanceState(lu, instance, req_states, msg=None):
1097 """Ensure that an instance is in one of the required states.
1099 @param lu: the LU on behalf of which we make the check
1100 @param instance: the instance to check
1101 @param msg: if passed, should be a message to replace the default one
1102 @raise errors.OpPrereqError: if the instance is not in the required state
1106 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1107 if instance.admin_state not in req_states:
1108 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1109 (instance.name, instance.admin_state, msg),
1112 if constants.ADMINST_UP not in req_states:
1113 pnode = instance.primary_node
1114 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1115 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1116 prereq=True, ecode=errors.ECODE_ENVIRON)
1118 if instance.name in ins_l.payload:
1119 raise errors.OpPrereqError("Instance %s is running, %s" %
1120 (instance.name, msg), errors.ECODE_STATE)
1123 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1124 """Computes if value is in the desired range.
1126 @param name: name of the parameter for which we perform the check
1127 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1129 @param ipolicy: dictionary containing min, max and std values
1130 @param value: actual value that we want to use
1131 @return: None or element not meeting the criteria
1135 if value in [None, constants.VALUE_AUTO]:
1137 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1138 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1139 if value > max_v or min_v > value:
1141 fqn = "%s/%s" % (name, qualifier)
1144 return ("%s value %s is not in range [%s, %s]" %
1145 (fqn, value, min_v, max_v))
1149 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1150 nic_count, disk_sizes, spindle_use,
1151 _compute_fn=_ComputeMinMaxSpec):
1152 """Verifies ipolicy against provided specs.
1155 @param ipolicy: The ipolicy
1157 @param mem_size: The memory size
1158 @type cpu_count: int
1159 @param cpu_count: Used cpu cores
1160 @type disk_count: int
1161 @param disk_count: Number of disks used
1162 @type nic_count: int
1163 @param nic_count: Number of nics used
1164 @type disk_sizes: list of ints
1165 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1166 @type spindle_use: int
1167 @param spindle_use: The number of spindles this instance uses
1168 @param _compute_fn: The compute function (unittest only)
1169 @return: A list of violations, or an empty list of no violations are found
1172 assert disk_count == len(disk_sizes)
1175 (constants.ISPEC_MEM_SIZE, "", mem_size),
1176 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1177 (constants.ISPEC_DISK_COUNT, "", disk_count),
1178 (constants.ISPEC_NIC_COUNT, "", nic_count),
1179 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1180 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1181 for idx, d in enumerate(disk_sizes)]
1184 (_compute_fn(name, qualifier, ipolicy, value)
1185 for (name, qualifier, value) in test_settings))
1188 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1189 _compute_fn=_ComputeIPolicySpecViolation):
1190 """Compute if instance meets the specs of ipolicy.
1193 @param ipolicy: The ipolicy to verify against
1194 @type instance: L{objects.Instance}
1195 @param instance: The instance to verify
1196 @param _compute_fn: The function to verify ipolicy (unittest only)
1197 @see: L{_ComputeIPolicySpecViolation}
1200 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1201 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1202 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1203 disk_count = len(instance.disks)
1204 disk_sizes = [disk.size for disk in instance.disks]
1205 nic_count = len(instance.nics)
1207 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1208 disk_sizes, spindle_use)
1211 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1212 _compute_fn=_ComputeIPolicySpecViolation):
1213 """Compute if instance specs meets the specs of ipolicy.
1216 @param ipolicy: The ipolicy to verify against
1217 @param instance_spec: dict
1218 @param instance_spec: The instance spec to verify
1219 @param _compute_fn: The function to verify ipolicy (unittest only)
1220 @see: L{_ComputeIPolicySpecViolation}
1223 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1224 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1225 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1226 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1227 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1228 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1230 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1231 disk_sizes, spindle_use)
1234 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1236 _compute_fn=_ComputeIPolicyInstanceViolation):
1237 """Compute if instance meets the specs of the new target group.
1239 @param ipolicy: The ipolicy to verify
1240 @param instance: The instance object to verify
1241 @param current_group: The current group of the instance
1242 @param target_group: The new group of the instance
1243 @param _compute_fn: The function to verify ipolicy (unittest only)
1244 @see: L{_ComputeIPolicySpecViolation}
1247 if current_group == target_group:
1250 return _compute_fn(ipolicy, instance)
1253 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1254 _compute_fn=_ComputeIPolicyNodeViolation):
1255 """Checks that the target node is correct in terms of instance policy.
1257 @param ipolicy: The ipolicy to verify
1258 @param instance: The instance object to verify
1259 @param node: The new node to relocate
1260 @param ignore: Ignore violations of the ipolicy
1261 @param _compute_fn: The function to verify ipolicy (unittest only)
1262 @see: L{_ComputeIPolicySpecViolation}
1265 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1266 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1269 msg = ("Instance does not meet target node group's (%s) instance"
1270 " policy: %s") % (node.group, utils.CommaJoin(res))
1274 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1277 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1278 """Computes a set of any instances that would violate the new ipolicy.
1280 @param old_ipolicy: The current (still in-place) ipolicy
1281 @param new_ipolicy: The new (to become) ipolicy
1282 @param instances: List of instances to verify
1283 @return: A list of instances which violates the new ipolicy but
1287 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1288 _ComputeViolatingInstances(old_ipolicy, instances))
1291 def _ExpandItemName(fn, name, kind):
1292 """Expand an item name.
1294 @param fn: the function to use for expansion
1295 @param name: requested item name
1296 @param kind: text description ('Node' or 'Instance')
1297 @return: the resolved (full) name
1298 @raise errors.OpPrereqError: if the item is not found
1301 full_name = fn(name)
1302 if full_name is None:
1303 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1308 def _ExpandNodeName(cfg, name):
1309 """Wrapper over L{_ExpandItemName} for nodes."""
1310 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1313 def _ExpandInstanceName(cfg, name):
1314 """Wrapper over L{_ExpandItemName} for instance."""
1315 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1318 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1319 minmem, maxmem, vcpus, nics, disk_template, disks,
1320 bep, hvp, hypervisor_name, tags):
1321 """Builds instance related env variables for hooks
1323 This builds the hook environment from individual variables.
1326 @param name: the name of the instance
1327 @type primary_node: string
1328 @param primary_node: the name of the instance's primary node
1329 @type secondary_nodes: list
1330 @param secondary_nodes: list of secondary nodes as strings
1331 @type os_type: string
1332 @param os_type: the name of the instance's OS
1333 @type status: string
1334 @param status: the desired status of the instance
1335 @type minmem: string
1336 @param minmem: the minimum memory size of the instance
1337 @type maxmem: string
1338 @param maxmem: the maximum memory size of the instance
1340 @param vcpus: the count of VCPUs the instance has
1342 @param nics: list of tuples (ip, mac, mode, link) representing
1343 the NICs the instance has
1344 @type disk_template: string
1345 @param disk_template: the disk template of the instance
1347 @param disks: the list of (size, mode) pairs
1349 @param bep: the backend parameters for the instance
1351 @param hvp: the hypervisor parameters for the instance
1352 @type hypervisor_name: string
1353 @param hypervisor_name: the hypervisor for the instance
1355 @param tags: list of instance tags as strings
1357 @return: the hook environment for this instance
1362 "INSTANCE_NAME": name,
1363 "INSTANCE_PRIMARY": primary_node,
1364 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1365 "INSTANCE_OS_TYPE": os_type,
1366 "INSTANCE_STATUS": status,
1367 "INSTANCE_MINMEM": minmem,
1368 "INSTANCE_MAXMEM": maxmem,
1369 # TODO(2.7) remove deprecated "memory" value
1370 "INSTANCE_MEMORY": maxmem,
1371 "INSTANCE_VCPUS": vcpus,
1372 "INSTANCE_DISK_TEMPLATE": disk_template,
1373 "INSTANCE_HYPERVISOR": hypervisor_name,
1376 nic_count = len(nics)
1377 for idx, (ip, mac, mode, link) in enumerate(nics):
1380 env["INSTANCE_NIC%d_IP" % idx] = ip
1381 env["INSTANCE_NIC%d_MAC" % idx] = mac
1382 env["INSTANCE_NIC%d_MODE" % idx] = mode
1383 env["INSTANCE_NIC%d_LINK" % idx] = link
1384 if mode == constants.NIC_MODE_BRIDGED:
1385 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1389 env["INSTANCE_NIC_COUNT"] = nic_count
1392 disk_count = len(disks)
1393 for idx, (size, mode) in enumerate(disks):
1394 env["INSTANCE_DISK%d_SIZE" % idx] = size
1395 env["INSTANCE_DISK%d_MODE" % idx] = mode
1399 env["INSTANCE_DISK_COUNT"] = disk_count
1404 env["INSTANCE_TAGS"] = " ".join(tags)
1406 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1407 for key, value in source.items():
1408 env["INSTANCE_%s_%s" % (kind, key)] = value
1413 def _NICListToTuple(lu, nics):
1414 """Build a list of nic information tuples.
1416 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1417 value in LUInstanceQueryData.
1419 @type lu: L{LogicalUnit}
1420 @param lu: the logical unit on whose behalf we execute
1421 @type nics: list of L{objects.NIC}
1422 @param nics: list of nics to convert to hooks tuples
1426 cluster = lu.cfg.GetClusterInfo()
1430 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1431 mode = filled_params[constants.NIC_MODE]
1432 link = filled_params[constants.NIC_LINK]
1433 hooks_nics.append((ip, mac, mode, link))
1437 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1438 """Builds instance related env variables for hooks from an object.
1440 @type lu: L{LogicalUnit}
1441 @param lu: the logical unit on whose behalf we execute
1442 @type instance: L{objects.Instance}
1443 @param instance: the instance for which we should build the
1445 @type override: dict
1446 @param override: dictionary with key/values that will override
1449 @return: the hook environment dictionary
1452 cluster = lu.cfg.GetClusterInfo()
1453 bep = cluster.FillBE(instance)
1454 hvp = cluster.FillHV(instance)
1456 "name": instance.name,
1457 "primary_node": instance.primary_node,
1458 "secondary_nodes": instance.secondary_nodes,
1459 "os_type": instance.os,
1460 "status": instance.admin_state,
1461 "maxmem": bep[constants.BE_MAXMEM],
1462 "minmem": bep[constants.BE_MINMEM],
1463 "vcpus": bep[constants.BE_VCPUS],
1464 "nics": _NICListToTuple(lu, instance.nics),
1465 "disk_template": instance.disk_template,
1466 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1469 "hypervisor_name": instance.hypervisor,
1470 "tags": instance.tags,
1473 args.update(override)
1474 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1477 def _AdjustCandidatePool(lu, exceptions):
1478 """Adjust the candidate pool after node operations.
1481 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1483 lu.LogInfo("Promoted nodes to master candidate role: %s",
1484 utils.CommaJoin(node.name for node in mod_list))
1485 for name in mod_list:
1486 lu.context.ReaddNode(name)
1487 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1489 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1493 def _DecideSelfPromotion(lu, exceptions=None):
1494 """Decide whether I should promote myself as a master candidate.
1497 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1498 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1499 # the new node will increase mc_max with one, so:
1500 mc_should = min(mc_should + 1, cp_size)
1501 return mc_now < mc_should
1504 def _CalculateGroupIPolicy(cluster, group):
1505 """Calculate instance policy for group.
1508 return cluster.SimpleFillIPolicy(group.ipolicy)
1511 def _ComputeViolatingInstances(ipolicy, instances):
1512 """Computes a set of instances who violates given ipolicy.
1514 @param ipolicy: The ipolicy to verify
1515 @type instances: object.Instance
1516 @param instances: List of instances to verify
1517 @return: A frozenset of instance names violating the ipolicy
1520 return frozenset([inst.name for inst in instances
1521 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1524 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1525 """Check that the brigdes needed by a list of nics exist.
1528 cluster = lu.cfg.GetClusterInfo()
1529 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1530 brlist = [params[constants.NIC_LINK] for params in paramslist
1531 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1533 result = lu.rpc.call_bridges_exist(target_node, brlist)
1534 result.Raise("Error checking bridges on destination node '%s'" %
1535 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1538 def _CheckInstanceBridgesExist(lu, instance, node=None):
1539 """Check that the brigdes needed by an instance exist.
1543 node = instance.primary_node
1544 _CheckNicsBridgesExist(lu, instance.nics, node)
1547 def _CheckOSVariant(os_obj, name):
1548 """Check whether an OS name conforms to the os variants specification.
1550 @type os_obj: L{objects.OS}
1551 @param os_obj: OS object to check
1553 @param name: OS name passed by the user, to check for validity
1556 variant = objects.OS.GetVariant(name)
1557 if not os_obj.supported_variants:
1559 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1560 " passed)" % (os_obj.name, variant),
1564 raise errors.OpPrereqError("OS name must include a variant",
1567 if variant not in os_obj.supported_variants:
1568 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1571 def _GetNodeInstancesInner(cfg, fn):
1572 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1575 def _GetNodeInstances(cfg, node_name):
1576 """Returns a list of all primary and secondary instances on a node.
1580 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1583 def _GetNodePrimaryInstances(cfg, node_name):
1584 """Returns primary instances on a node.
1587 return _GetNodeInstancesInner(cfg,
1588 lambda inst: node_name == inst.primary_node)
1591 def _GetNodeSecondaryInstances(cfg, node_name):
1592 """Returns secondary instances on a node.
1595 return _GetNodeInstancesInner(cfg,
1596 lambda inst: node_name in inst.secondary_nodes)
1599 def _GetStorageTypeArgs(cfg, storage_type):
1600 """Returns the arguments for a storage type.
1603 # Special case for file storage
1604 if storage_type == constants.ST_FILE:
1605 # storage.FileStorage wants a list of storage directories
1606 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1611 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1614 for dev in instance.disks:
1615 cfg.SetDiskID(dev, node_name)
1617 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1619 result.Raise("Failed to get disk status from node %s" % node_name,
1620 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1622 for idx, bdev_status in enumerate(result.payload):
1623 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1629 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1630 """Check the sanity of iallocator and node arguments and use the
1631 cluster-wide iallocator if appropriate.
1633 Check that at most one of (iallocator, node) is specified. If none is
1634 specified, then the LU's opcode's iallocator slot is filled with the
1635 cluster-wide default iallocator.
1637 @type iallocator_slot: string
1638 @param iallocator_slot: the name of the opcode iallocator slot
1639 @type node_slot: string
1640 @param node_slot: the name of the opcode target node slot
1643 node = getattr(lu.op, node_slot, None)
1644 iallocator = getattr(lu.op, iallocator_slot, None)
1646 if node is not None and iallocator is not None:
1647 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1649 elif node is None and iallocator is None:
1650 default_iallocator = lu.cfg.GetDefaultIAllocator()
1651 if default_iallocator:
1652 setattr(lu.op, iallocator_slot, default_iallocator)
1654 raise errors.OpPrereqError("No iallocator or node given and no"
1655 " cluster-wide default iallocator found;"
1656 " please specify either an iallocator or a"
1657 " node, or set a cluster-wide default"
1661 def _GetDefaultIAllocator(cfg, iallocator):
1662 """Decides on which iallocator to use.
1664 @type cfg: L{config.ConfigWriter}
1665 @param cfg: Cluster configuration object
1666 @type iallocator: string or None
1667 @param iallocator: Iallocator specified in opcode
1669 @return: Iallocator name
1673 # Use default iallocator
1674 iallocator = cfg.GetDefaultIAllocator()
1677 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1678 " opcode nor as a cluster-wide default",
1684 class LUClusterPostInit(LogicalUnit):
1685 """Logical unit for running hooks after cluster initialization.
1688 HPATH = "cluster-init"
1689 HTYPE = constants.HTYPE_CLUSTER
1691 def BuildHooksEnv(self):
1696 "OP_TARGET": self.cfg.GetClusterName(),
1699 def BuildHooksNodes(self):
1700 """Build hooks nodes.
1703 return ([], [self.cfg.GetMasterNode()])
1705 def Exec(self, feedback_fn):
1712 class LUClusterDestroy(LogicalUnit):
1713 """Logical unit for destroying the cluster.
1716 HPATH = "cluster-destroy"
1717 HTYPE = constants.HTYPE_CLUSTER
1719 def BuildHooksEnv(self):
1724 "OP_TARGET": self.cfg.GetClusterName(),
1727 def BuildHooksNodes(self):
1728 """Build hooks nodes.
1733 def CheckPrereq(self):
1734 """Check prerequisites.
1736 This checks whether the cluster is empty.
1738 Any errors are signaled by raising errors.OpPrereqError.
1741 master = self.cfg.GetMasterNode()
1743 nodelist = self.cfg.GetNodeList()
1744 if len(nodelist) != 1 or nodelist[0] != master:
1745 raise errors.OpPrereqError("There are still %d node(s) in"
1746 " this cluster." % (len(nodelist) - 1),
1748 instancelist = self.cfg.GetInstanceList()
1750 raise errors.OpPrereqError("There are still %d instance(s) in"
1751 " this cluster." % len(instancelist),
1754 def Exec(self, feedback_fn):
1755 """Destroys the cluster.
1758 master_params = self.cfg.GetMasterNetworkParameters()
1760 # Run post hooks on master node before it's removed
1761 _RunPostHook(self, master_params.name)
1763 ems = self.cfg.GetUseExternalMipScript()
1764 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1767 self.LogWarning("Error disabling the master IP address: %s",
1770 return master_params.name
1773 def _VerifyCertificate(filename):
1774 """Verifies a certificate for L{LUClusterVerifyConfig}.
1776 @type filename: string
1777 @param filename: Path to PEM file
1781 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1782 utils.ReadFile(filename))
1783 except Exception, err: # pylint: disable=W0703
1784 return (LUClusterVerifyConfig.ETYPE_ERROR,
1785 "Failed to load X509 certificate %s: %s" % (filename, err))
1788 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1789 constants.SSL_CERT_EXPIRATION_ERROR)
1792 fnamemsg = "While verifying %s: %s" % (filename, msg)
1797 return (None, fnamemsg)
1798 elif errcode == utils.CERT_WARNING:
1799 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1800 elif errcode == utils.CERT_ERROR:
1801 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1803 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1806 def _GetAllHypervisorParameters(cluster, instances):
1807 """Compute the set of all hypervisor parameters.
1809 @type cluster: L{objects.Cluster}
1810 @param cluster: the cluster object
1811 @param instances: list of L{objects.Instance}
1812 @param instances: additional instances from which to obtain parameters
1813 @rtype: list of (origin, hypervisor, parameters)
1814 @return: a list with all parameters found, indicating the hypervisor they
1815 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1820 for hv_name in cluster.enabled_hypervisors:
1821 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1823 for os_name, os_hvp in cluster.os_hvp.items():
1824 for hv_name, hv_params in os_hvp.items():
1826 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1827 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1829 # TODO: collapse identical parameter values in a single one
1830 for instance in instances:
1831 if instance.hvparams:
1832 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1833 cluster.FillHV(instance)))
1838 class _VerifyErrors(object):
1839 """Mix-in for cluster/group verify LUs.
1841 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1842 self.op and self._feedback_fn to be available.)
1846 ETYPE_FIELD = "code"
1847 ETYPE_ERROR = "ERROR"
1848 ETYPE_WARNING = "WARNING"
1850 def _Error(self, ecode, item, msg, *args, **kwargs):
1851 """Format an error message.
1853 Based on the opcode's error_codes parameter, either format a
1854 parseable error code, or a simpler error string.
1856 This must be called only from Exec and functions called from Exec.
1859 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1860 itype, etxt, _ = ecode
1861 # first complete the msg
1864 # then format the whole message
1865 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1866 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1872 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1873 # and finally report it via the feedback_fn
1874 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1876 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1877 """Log an error message if the passed condition is True.
1881 or self.op.debug_simulate_errors) # pylint: disable=E1101
1883 # If the error code is in the list of ignored errors, demote the error to a
1885 (_, etxt, _) = ecode
1886 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1887 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1890 self._Error(ecode, *args, **kwargs)
1892 # do not mark the operation as failed for WARN cases only
1893 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1894 self.bad = self.bad or cond
1897 class LUClusterVerify(NoHooksLU):
1898 """Submits all jobs necessary to verify the cluster.
1903 def ExpandNames(self):
1904 self.needed_locks = {}
1906 def Exec(self, feedback_fn):
1909 if self.op.group_name:
1910 groups = [self.op.group_name]
1911 depends_fn = lambda: None
1913 groups = self.cfg.GetNodeGroupList()
1915 # Verify global configuration
1917 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1920 # Always depend on global verification
1921 depends_fn = lambda: [(-len(jobs), [])]
1923 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1924 ignore_errors=self.op.ignore_errors,
1925 depends=depends_fn())]
1926 for group in groups)
1928 # Fix up all parameters
1929 for op in itertools.chain(*jobs): # pylint: disable=W0142
1930 op.debug_simulate_errors = self.op.debug_simulate_errors
1931 op.verbose = self.op.verbose
1932 op.error_codes = self.op.error_codes
1934 op.skip_checks = self.op.skip_checks
1935 except AttributeError:
1936 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1938 return ResultWithJobs(jobs)
1941 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1942 """Verifies the cluster config.
1947 def _VerifyHVP(self, hvp_data):
1948 """Verifies locally the syntax of the hypervisor parameters.
1951 for item, hv_name, hv_params in hvp_data:
1952 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1955 hv_class = hypervisor.GetHypervisor(hv_name)
1956 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1957 hv_class.CheckParameterSyntax(hv_params)
1958 except errors.GenericError, err:
1959 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1961 def ExpandNames(self):
1962 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1963 self.share_locks = _ShareAll()
1965 def CheckPrereq(self):
1966 """Check prerequisites.
1969 # Retrieve all information
1970 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1971 self.all_node_info = self.cfg.GetAllNodesInfo()
1972 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1974 def Exec(self, feedback_fn):
1975 """Verify integrity of cluster, performing various test on nodes.
1979 self._feedback_fn = feedback_fn
1981 feedback_fn("* Verifying cluster config")
1983 for msg in self.cfg.VerifyConfig():
1984 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1986 feedback_fn("* Verifying cluster certificate files")
1988 for cert_filename in constants.ALL_CERT_FILES:
1989 (errcode, msg) = _VerifyCertificate(cert_filename)
1990 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1992 feedback_fn("* Verifying hypervisor parameters")
1994 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1995 self.all_inst_info.values()))
1997 feedback_fn("* Verifying all nodes belong to an existing group")
1999 # We do this verification here because, should this bogus circumstance
2000 # occur, it would never be caught by VerifyGroup, which only acts on
2001 # nodes/instances reachable from existing node groups.
2003 dangling_nodes = set(node.name for node in self.all_node_info.values()
2004 if node.group not in self.all_group_info)
2006 dangling_instances = {}
2007 no_node_instances = []
2009 for inst in self.all_inst_info.values():
2010 if inst.primary_node in dangling_nodes:
2011 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2012 elif inst.primary_node not in self.all_node_info:
2013 no_node_instances.append(inst.name)
2018 utils.CommaJoin(dangling_instances.get(node.name,
2020 for node in dangling_nodes]
2022 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2024 "the following nodes (and their instances) belong to a non"
2025 " existing group: %s", utils.CommaJoin(pretty_dangling))
2027 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2029 "the following instances have a non-existing primary-node:"
2030 " %s", utils.CommaJoin(no_node_instances))
2035 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2036 """Verifies the status of a node group.
2039 HPATH = "cluster-verify"
2040 HTYPE = constants.HTYPE_CLUSTER
2043 _HOOKS_INDENT_RE = re.compile("^", re.M)
2045 class NodeImage(object):
2046 """A class representing the logical and physical status of a node.
2049 @ivar name: the node name to which this object refers
2050 @ivar volumes: a structure as returned from
2051 L{ganeti.backend.GetVolumeList} (runtime)
2052 @ivar instances: a list of running instances (runtime)
2053 @ivar pinst: list of configured primary instances (config)
2054 @ivar sinst: list of configured secondary instances (config)
2055 @ivar sbp: dictionary of {primary-node: list of instances} for all
2056 instances for which this node is secondary (config)
2057 @ivar mfree: free memory, as reported by hypervisor (runtime)
2058 @ivar dfree: free disk, as reported by the node (runtime)
2059 @ivar offline: the offline status (config)
2060 @type rpc_fail: boolean
2061 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2062 not whether the individual keys were correct) (runtime)
2063 @type lvm_fail: boolean
2064 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2065 @type hyp_fail: boolean
2066 @ivar hyp_fail: whether the RPC call didn't return the instance list
2067 @type ghost: boolean
2068 @ivar ghost: whether this is a known node or not (config)
2069 @type os_fail: boolean
2070 @ivar os_fail: whether the RPC call didn't return valid OS data
2072 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2073 @type vm_capable: boolean
2074 @ivar vm_capable: whether the node can host instances
2077 def __init__(self, offline=False, name=None, vm_capable=True):
2086 self.offline = offline
2087 self.vm_capable = vm_capable
2088 self.rpc_fail = False
2089 self.lvm_fail = False
2090 self.hyp_fail = False
2092 self.os_fail = False
2095 def ExpandNames(self):
2096 # This raises errors.OpPrereqError on its own:
2097 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2099 # Get instances in node group; this is unsafe and needs verification later
2101 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2103 self.needed_locks = {
2104 locking.LEVEL_INSTANCE: inst_names,
2105 locking.LEVEL_NODEGROUP: [self.group_uuid],
2106 locking.LEVEL_NODE: [],
2109 self.share_locks = _ShareAll()
2111 def DeclareLocks(self, level):
2112 if level == locking.LEVEL_NODE:
2113 # Get members of node group; this is unsafe and needs verification later
2114 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2116 all_inst_info = self.cfg.GetAllInstancesInfo()
2118 # In Exec(), we warn about mirrored instances that have primary and
2119 # secondary living in separate node groups. To fully verify that
2120 # volumes for these instances are healthy, we will need to do an
2121 # extra call to their secondaries. We ensure here those nodes will
2123 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2124 # Important: access only the instances whose lock is owned
2125 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2126 nodes.update(all_inst_info[inst].secondary_nodes)
2128 self.needed_locks[locking.LEVEL_NODE] = nodes
2130 def CheckPrereq(self):
2131 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2132 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2134 group_nodes = set(self.group_info.members)
2136 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2139 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2141 unlocked_instances = \
2142 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2145 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2146 utils.CommaJoin(unlocked_nodes),
2149 if unlocked_instances:
2150 raise errors.OpPrereqError("Missing lock for instances: %s" %
2151 utils.CommaJoin(unlocked_instances),
2154 self.all_node_info = self.cfg.GetAllNodesInfo()
2155 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2157 self.my_node_names = utils.NiceSort(group_nodes)
2158 self.my_inst_names = utils.NiceSort(group_instances)
2160 self.my_node_info = dict((name, self.all_node_info[name])
2161 for name in self.my_node_names)
2163 self.my_inst_info = dict((name, self.all_inst_info[name])
2164 for name in self.my_inst_names)
2166 # We detect here the nodes that will need the extra RPC calls for verifying
2167 # split LV volumes; they should be locked.
2168 extra_lv_nodes = set()
2170 for inst in self.my_inst_info.values():
2171 if inst.disk_template in constants.DTS_INT_MIRROR:
2172 for nname in inst.all_nodes:
2173 if self.all_node_info[nname].group != self.group_uuid:
2174 extra_lv_nodes.add(nname)
2176 unlocked_lv_nodes = \
2177 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2179 if unlocked_lv_nodes:
2180 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2181 utils.CommaJoin(unlocked_lv_nodes),
2183 self.extra_lv_nodes = list(extra_lv_nodes)
2185 def _VerifyNode(self, ninfo, nresult):
2186 """Perform some basic validation on data returned from a node.
2188 - check the result data structure is well formed and has all the
2190 - check ganeti version
2192 @type ninfo: L{objects.Node}
2193 @param ninfo: the node to check
2194 @param nresult: the results from the node
2196 @return: whether overall this call was successful (and we can expect
2197 reasonable values in the respose)
2201 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2203 # main result, nresult should be a non-empty dict
2204 test = not nresult or not isinstance(nresult, dict)
2205 _ErrorIf(test, constants.CV_ENODERPC, node,
2206 "unable to verify node: no data returned")
2210 # compares ganeti version
2211 local_version = constants.PROTOCOL_VERSION
2212 remote_version = nresult.get("version", None)
2213 test = not (remote_version and
2214 isinstance(remote_version, (list, tuple)) and
2215 len(remote_version) == 2)
2216 _ErrorIf(test, constants.CV_ENODERPC, node,
2217 "connection to node returned invalid data")
2221 test = local_version != remote_version[0]
2222 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2223 "incompatible protocol versions: master %s,"
2224 " node %s", local_version, remote_version[0])
2228 # node seems compatible, we can actually try to look into its results
2230 # full package version
2231 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2232 constants.CV_ENODEVERSION, node,
2233 "software version mismatch: master %s, node %s",
2234 constants.RELEASE_VERSION, remote_version[1],
2235 code=self.ETYPE_WARNING)
2237 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2238 if ninfo.vm_capable and isinstance(hyp_result, dict):
2239 for hv_name, hv_result in hyp_result.iteritems():
2240 test = hv_result is not None
2241 _ErrorIf(test, constants.CV_ENODEHV, node,
2242 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2244 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2245 if ninfo.vm_capable and isinstance(hvp_result, list):
2246 for item, hv_name, hv_result in hvp_result:
2247 _ErrorIf(True, constants.CV_ENODEHV, node,
2248 "hypervisor %s parameter verify failure (source %s): %s",
2249 hv_name, item, hv_result)
2251 test = nresult.get(constants.NV_NODESETUP,
2252 ["Missing NODESETUP results"])
2253 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2258 def _VerifyNodeTime(self, ninfo, nresult,
2259 nvinfo_starttime, nvinfo_endtime):
2260 """Check the node time.
2262 @type ninfo: L{objects.Node}
2263 @param ninfo: the node to check
2264 @param nresult: the remote results for the node
2265 @param nvinfo_starttime: the start time of the RPC call
2266 @param nvinfo_endtime: the end time of the RPC call
2270 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2272 ntime = nresult.get(constants.NV_TIME, None)
2274 ntime_merged = utils.MergeTime(ntime)
2275 except (ValueError, TypeError):
2276 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2279 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2280 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2281 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2282 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2286 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2287 "Node time diverges by at least %s from master node time",
2290 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2291 """Check the node LVM results.
2293 @type ninfo: L{objects.Node}
2294 @param ninfo: the node to check
2295 @param nresult: the remote results for the node
2296 @param vg_name: the configured VG name
2303 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2305 # checks vg existence and size > 20G
2306 vglist = nresult.get(constants.NV_VGLIST, None)
2308 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2310 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2311 constants.MIN_VG_SIZE)
2312 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2315 pvlist = nresult.get(constants.NV_PVLIST, None)
2316 test = pvlist is None
2317 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2319 # check that ':' is not present in PV names, since it's a
2320 # special character for lvcreate (denotes the range of PEs to
2322 for _, pvname, owner_vg in pvlist:
2323 test = ":" in pvname
2324 _ErrorIf(test, constants.CV_ENODELVM, node,
2325 "Invalid character ':' in PV '%s' of VG '%s'",
2328 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2329 """Check the node bridges.
2331 @type ninfo: L{objects.Node}
2332 @param ninfo: the node to check
2333 @param nresult: the remote results for the node
2334 @param bridges: the expected list of bridges
2341 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2343 missing = nresult.get(constants.NV_BRIDGES, None)
2344 test = not isinstance(missing, list)
2345 _ErrorIf(test, constants.CV_ENODENET, node,
2346 "did not return valid bridge information")
2348 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2349 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2351 def _VerifyNodeUserScripts(self, ninfo, nresult):
2352 """Check the results of user scripts presence and executability on the node
2354 @type ninfo: L{objects.Node}
2355 @param ninfo: the node to check
2356 @param nresult: the remote results for the node
2361 test = not constants.NV_USERSCRIPTS in nresult
2362 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2363 "did not return user scripts information")
2365 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2367 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2368 "user scripts not present or not executable: %s" %
2369 utils.CommaJoin(sorted(broken_scripts)))
2371 def _VerifyNodeNetwork(self, ninfo, nresult):
2372 """Check the node network connectivity results.
2374 @type ninfo: L{objects.Node}
2375 @param ninfo: the node to check
2376 @param nresult: the remote results for the node
2380 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2382 test = constants.NV_NODELIST not in nresult
2383 _ErrorIf(test, constants.CV_ENODESSH, node,
2384 "node hasn't returned node ssh connectivity data")
2386 if nresult[constants.NV_NODELIST]:
2387 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2388 _ErrorIf(True, constants.CV_ENODESSH, node,
2389 "ssh communication with node '%s': %s", a_node, a_msg)
2391 test = constants.NV_NODENETTEST not in nresult
2392 _ErrorIf(test, constants.CV_ENODENET, node,
2393 "node hasn't returned node tcp connectivity data")
2395 if nresult[constants.NV_NODENETTEST]:
2396 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2398 _ErrorIf(True, constants.CV_ENODENET, node,
2399 "tcp communication with node '%s': %s",
2400 anode, nresult[constants.NV_NODENETTEST][anode])
2402 test = constants.NV_MASTERIP not in nresult
2403 _ErrorIf(test, constants.CV_ENODENET, node,
2404 "node hasn't returned node master IP reachability data")
2406 if not nresult[constants.NV_MASTERIP]:
2407 if node == self.master_node:
2408 msg = "the master node cannot reach the master IP (not configured?)"
2410 msg = "cannot reach the master IP"
2411 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2413 def _VerifyInstance(self, instance, instanceconfig, node_image,
2415 """Verify an instance.
2417 This function checks to see if the required block devices are
2418 available on the instance's node.
2421 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2422 node_current = instanceconfig.primary_node
2424 node_vol_should = {}
2425 instanceconfig.MapLVsByNode(node_vol_should)
2427 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2428 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2429 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2431 for node in node_vol_should:
2432 n_img = node_image[node]
2433 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2434 # ignore missing volumes on offline or broken nodes
2436 for volume in node_vol_should[node]:
2437 test = volume not in n_img.volumes
2438 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2439 "volume %s missing on node %s", volume, node)
2441 if instanceconfig.admin_state == constants.ADMINST_UP:
2442 pri_img = node_image[node_current]
2443 test = instance not in pri_img.instances and not pri_img.offline
2444 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2445 "instance not running on its primary node %s",
2448 diskdata = [(nname, success, status, idx)
2449 for (nname, disks) in diskstatus.items()
2450 for idx, (success, status) in enumerate(disks)]
2452 for nname, success, bdev_status, idx in diskdata:
2453 # the 'ghost node' construction in Exec() ensures that we have a
2455 snode = node_image[nname]
2456 bad_snode = snode.ghost or snode.offline
2457 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2458 not success and not bad_snode,
2459 constants.CV_EINSTANCEFAULTYDISK, instance,
2460 "couldn't retrieve status for disk/%s on %s: %s",
2461 idx, nname, bdev_status)
2462 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2463 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2464 constants.CV_EINSTANCEFAULTYDISK, instance,
2465 "disk/%s on %s is faulty", idx, nname)
2467 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2468 """Verify if there are any unknown volumes in the cluster.
2470 The .os, .swap and backup volumes are ignored. All other volumes are
2471 reported as unknown.
2473 @type reserved: L{ganeti.utils.FieldSet}
2474 @param reserved: a FieldSet of reserved volume names
2477 for node, n_img in node_image.items():
2478 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2479 self.all_node_info[node].group != self.group_uuid):
2480 # skip non-healthy nodes
2482 for volume in n_img.volumes:
2483 test = ((node not in node_vol_should or
2484 volume not in node_vol_should[node]) and
2485 not reserved.Matches(volume))
2486 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2487 "volume %s is unknown", volume)
2489 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2490 """Verify N+1 Memory Resilience.
2492 Check that if one single node dies we can still start all the
2493 instances it was primary for.
2496 cluster_info = self.cfg.GetClusterInfo()
2497 for node, n_img in node_image.items():
2498 # This code checks that every node which is now listed as
2499 # secondary has enough memory to host all instances it is
2500 # supposed to should a single other node in the cluster fail.
2501 # FIXME: not ready for failover to an arbitrary node
2502 # FIXME: does not support file-backed instances
2503 # WARNING: we currently take into account down instances as well
2504 # as up ones, considering that even if they're down someone
2505 # might want to start them even in the event of a node failure.
2506 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2507 # we're skipping nodes marked offline and nodes in other groups from
2508 # the N+1 warning, since most likely we don't have good memory
2509 # infromation from them; we already list instances living on such
2510 # nodes, and that's enough warning
2512 #TODO(dynmem): also consider ballooning out other instances
2513 for prinode, instances in n_img.sbp.items():
2515 for instance in instances:
2516 bep = cluster_info.FillBE(instance_cfg[instance])
2517 if bep[constants.BE_AUTO_BALANCE]:
2518 needed_mem += bep[constants.BE_MINMEM]
2519 test = n_img.mfree < needed_mem
2520 self._ErrorIf(test, constants.CV_ENODEN1, node,
2521 "not enough memory to accomodate instance failovers"
2522 " should node %s fail (%dMiB needed, %dMiB available)",
2523 prinode, needed_mem, n_img.mfree)
2526 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2527 (files_all, files_opt, files_mc, files_vm)):
2528 """Verifies file checksums collected from all nodes.
2530 @param errorif: Callback for reporting errors
2531 @param nodeinfo: List of L{objects.Node} objects
2532 @param master_node: Name of master node
2533 @param all_nvinfo: RPC results
2536 # Define functions determining which nodes to consider for a file
2539 (files_mc, lambda node: (node.master_candidate or
2540 node.name == master_node)),
2541 (files_vm, lambda node: node.vm_capable),
2544 # Build mapping from filename to list of nodes which should have the file
2546 for (files, fn) in files2nodefn:
2548 filenodes = nodeinfo
2550 filenodes = filter(fn, nodeinfo)
2551 nodefiles.update((filename,
2552 frozenset(map(operator.attrgetter("name"), filenodes)))
2553 for filename in files)
2555 assert set(nodefiles) == (files_all | files_mc | files_vm)
2557 fileinfo = dict((filename, {}) for filename in nodefiles)
2558 ignore_nodes = set()
2560 for node in nodeinfo:
2562 ignore_nodes.add(node.name)
2565 nresult = all_nvinfo[node.name]
2567 if nresult.fail_msg or not nresult.payload:
2570 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2572 test = not (node_files and isinstance(node_files, dict))
2573 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2574 "Node did not return file checksum data")
2576 ignore_nodes.add(node.name)
2579 # Build per-checksum mapping from filename to nodes having it
2580 for (filename, checksum) in node_files.items():
2581 assert filename in nodefiles
2582 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2584 for (filename, checksums) in fileinfo.items():
2585 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2587 # Nodes having the file
2588 with_file = frozenset(node_name
2589 for nodes in fileinfo[filename].values()
2590 for node_name in nodes) - ignore_nodes
2592 expected_nodes = nodefiles[filename] - ignore_nodes
2594 # Nodes missing file
2595 missing_file = expected_nodes - with_file
2597 if filename in files_opt:
2599 errorif(missing_file and missing_file != expected_nodes,
2600 constants.CV_ECLUSTERFILECHECK, None,
2601 "File %s is optional, but it must exist on all or no"
2602 " nodes (not found on %s)",
2603 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2605 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2606 "File %s is missing from node(s) %s", filename,
2607 utils.CommaJoin(utils.NiceSort(missing_file)))
2609 # Warn if a node has a file it shouldn't
2610 unexpected = with_file - expected_nodes
2612 constants.CV_ECLUSTERFILECHECK, None,
2613 "File %s should not exist on node(s) %s",
2614 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2616 # See if there are multiple versions of the file
2617 test = len(checksums) > 1
2619 variants = ["variant %s on %s" %
2620 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2621 for (idx, (checksum, nodes)) in
2622 enumerate(sorted(checksums.items()))]
2626 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2627 "File %s found with %s different checksums (%s)",
2628 filename, len(checksums), "; ".join(variants))
2630 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2632 """Verifies and the node DRBD status.
2634 @type ninfo: L{objects.Node}
2635 @param ninfo: the node to check
2636 @param nresult: the remote results for the node
2637 @param instanceinfo: the dict of instances
2638 @param drbd_helper: the configured DRBD usermode helper
2639 @param drbd_map: the DRBD map as returned by
2640 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2644 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2647 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2648 test = (helper_result == None)
2649 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2650 "no drbd usermode helper returned")
2652 status, payload = helper_result
2654 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655 "drbd usermode helper check unsuccessful: %s", payload)
2656 test = status and (payload != drbd_helper)
2657 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2658 "wrong drbd usermode helper: %s", payload)
2660 # compute the DRBD minors
2662 for minor, instance in drbd_map[node].items():
2663 test = instance not in instanceinfo
2664 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2665 "ghost instance '%s' in temporary DRBD map", instance)
2666 # ghost instance should not be running, but otherwise we
2667 # don't give double warnings (both ghost instance and
2668 # unallocated minor in use)
2670 node_drbd[minor] = (instance, False)
2672 instance = instanceinfo[instance]
2673 node_drbd[minor] = (instance.name,
2674 instance.admin_state == constants.ADMINST_UP)
2676 # and now check them
2677 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2678 test = not isinstance(used_minors, (tuple, list))
2679 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2680 "cannot parse drbd status file: %s", str(used_minors))
2682 # we cannot check drbd status
2685 for minor, (iname, must_exist) in node_drbd.items():
2686 test = minor not in used_minors and must_exist
2687 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2688 "drbd minor %d of instance %s is not active", minor, iname)
2689 for minor in used_minors:
2690 test = minor not in node_drbd
2691 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2692 "unallocated drbd minor %d is in use", minor)
2694 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2695 """Builds the node OS structures.
2697 @type ninfo: L{objects.Node}
2698 @param ninfo: the node to check
2699 @param nresult: the remote results for the node
2700 @param nimg: the node image object
2704 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2706 remote_os = nresult.get(constants.NV_OSLIST, None)
2707 test = (not isinstance(remote_os, list) or
2708 not compat.all(isinstance(v, list) and len(v) == 7
2709 for v in remote_os))
2711 _ErrorIf(test, constants.CV_ENODEOS, node,
2712 "node hasn't returned valid OS data")
2721 for (name, os_path, status, diagnose,
2722 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2724 if name not in os_dict:
2727 # parameters is a list of lists instead of list of tuples due to
2728 # JSON lacking a real tuple type, fix it:
2729 parameters = [tuple(v) for v in parameters]
2730 os_dict[name].append((os_path, status, diagnose,
2731 set(variants), set(parameters), set(api_ver)))
2733 nimg.oslist = os_dict
2735 def _VerifyNodeOS(self, ninfo, nimg, base):
2736 """Verifies the node OS list.
2738 @type ninfo: L{objects.Node}
2739 @param ninfo: the node to check
2740 @param nimg: the node image object
2741 @param base: the 'template' node we match against (e.g. from the master)
2745 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2747 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2749 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2750 for os_name, os_data in nimg.oslist.items():
2751 assert os_data, "Empty OS status for OS %s?!" % os_name
2752 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2753 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2754 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2755 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2756 "OS '%s' has multiple entries (first one shadows the rest): %s",
2757 os_name, utils.CommaJoin([v[0] for v in os_data]))
2758 # comparisons with the 'base' image
2759 test = os_name not in base.oslist
2760 _ErrorIf(test, constants.CV_ENODEOS, node,
2761 "Extra OS %s not present on reference node (%s)",
2765 assert base.oslist[os_name], "Base node has empty OS status?"
2766 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2768 # base OS is invalid, skipping
2770 for kind, a, b in [("API version", f_api, b_api),
2771 ("variants list", f_var, b_var),
2772 ("parameters", beautify_params(f_param),
2773 beautify_params(b_param))]:
2774 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2775 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2776 kind, os_name, base.name,
2777 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2779 # check any missing OSes
2780 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2781 _ErrorIf(missing, constants.CV_ENODEOS, node,
2782 "OSes present on reference node %s but missing on this node: %s",
2783 base.name, utils.CommaJoin(missing))
2785 def _VerifyOob(self, ninfo, nresult):
2786 """Verifies out of band functionality of a node.
2788 @type ninfo: L{objects.Node}
2789 @param ninfo: the node to check
2790 @param nresult: the remote results for the node
2794 # We just have to verify the paths on master and/or master candidates
2795 # as the oob helper is invoked on the master
2796 if ((ninfo.master_candidate or ninfo.master_capable) and
2797 constants.NV_OOB_PATHS in nresult):
2798 for path_result in nresult[constants.NV_OOB_PATHS]:
2799 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2801 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2802 """Verifies and updates the node volume data.
2804 This function will update a L{NodeImage}'s internal structures
2805 with data from the remote call.
2807 @type ninfo: L{objects.Node}
2808 @param ninfo: the node to check
2809 @param nresult: the remote results for the node
2810 @param nimg: the node image object
2811 @param vg_name: the configured VG name
2815 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2817 nimg.lvm_fail = True
2818 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2821 elif isinstance(lvdata, basestring):
2822 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2823 utils.SafeEncode(lvdata))
2824 elif not isinstance(lvdata, dict):
2825 _ErrorIf(True, constants.CV_ENODELVM, node,
2826 "rpc call to node failed (lvlist)")
2828 nimg.volumes = lvdata
2829 nimg.lvm_fail = False
2831 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2832 """Verifies and updates the node instance list.
2834 If the listing was successful, then updates this node's instance
2835 list. Otherwise, it marks the RPC call as failed for the instance
2838 @type ninfo: L{objects.Node}
2839 @param ninfo: the node to check
2840 @param nresult: the remote results for the node
2841 @param nimg: the node image object
2844 idata = nresult.get(constants.NV_INSTANCELIST, None)
2845 test = not isinstance(idata, list)
2846 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2847 "rpc call to node failed (instancelist): %s",
2848 utils.SafeEncode(str(idata)))
2850 nimg.hyp_fail = True
2852 nimg.instances = idata
2854 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2855 """Verifies and computes a node information map
2857 @type ninfo: L{objects.Node}
2858 @param ninfo: the node to check
2859 @param nresult: the remote results for the node
2860 @param nimg: the node image object
2861 @param vg_name: the configured VG name
2865 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2867 # try to read free memory (from the hypervisor)
2868 hv_info = nresult.get(constants.NV_HVINFO, None)
2869 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2870 _ErrorIf(test, constants.CV_ENODEHV, node,
2871 "rpc call to node failed (hvinfo)")
2874 nimg.mfree = int(hv_info["memory_free"])
2875 except (ValueError, TypeError):
2876 _ErrorIf(True, constants.CV_ENODERPC, node,
2877 "node returned invalid nodeinfo, check hypervisor")
2879 # FIXME: devise a free space model for file based instances as well
2880 if vg_name is not None:
2881 test = (constants.NV_VGLIST not in nresult or
2882 vg_name not in nresult[constants.NV_VGLIST])
2883 _ErrorIf(test, constants.CV_ENODELVM, node,
2884 "node didn't return data for the volume group '%s'"
2885 " - it is either missing or broken", vg_name)
2888 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2889 except (ValueError, TypeError):
2890 _ErrorIf(True, constants.CV_ENODERPC, node,
2891 "node returned invalid LVM info, check LVM status")
2893 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2894 """Gets per-disk status information for all instances.
2896 @type nodelist: list of strings
2897 @param nodelist: Node names
2898 @type node_image: dict of (name, L{objects.Node})
2899 @param node_image: Node objects
2900 @type instanceinfo: dict of (name, L{objects.Instance})
2901 @param instanceinfo: Instance objects
2902 @rtype: {instance: {node: [(succes, payload)]}}
2903 @return: a dictionary of per-instance dictionaries with nodes as
2904 keys and disk information as values; the disk information is a
2905 list of tuples (success, payload)
2908 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2911 node_disks_devonly = {}
2912 diskless_instances = set()
2913 diskless = constants.DT_DISKLESS
2915 for nname in nodelist:
2916 node_instances = list(itertools.chain(node_image[nname].pinst,
2917 node_image[nname].sinst))
2918 diskless_instances.update(inst for inst in node_instances
2919 if instanceinfo[inst].disk_template == diskless)
2920 disks = [(inst, disk)
2921 for inst in node_instances
2922 for disk in instanceinfo[inst].disks]
2925 # No need to collect data
2928 node_disks[nname] = disks
2930 # _AnnotateDiskParams makes already copies of the disks
2932 for (inst, dev) in disks:
2933 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2934 self.cfg.SetDiskID(anno_disk, nname)
2935 devonly.append(anno_disk)
2937 node_disks_devonly[nname] = devonly
2939 assert len(node_disks) == len(node_disks_devonly)
2941 # Collect data from all nodes with disks
2942 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2945 assert len(result) == len(node_disks)
2949 for (nname, nres) in result.items():
2950 disks = node_disks[nname]
2953 # No data from this node
2954 data = len(disks) * [(False, "node offline")]
2957 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2958 "while getting disk information: %s", msg)
2960 # No data from this node
2961 data = len(disks) * [(False, msg)]
2964 for idx, i in enumerate(nres.payload):
2965 if isinstance(i, (tuple, list)) and len(i) == 2:
2968 logging.warning("Invalid result from node %s, entry %d: %s",
2970 data.append((False, "Invalid result from the remote node"))
2972 for ((inst, _), status) in zip(disks, data):
2973 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2975 # Add empty entries for diskless instances.
2976 for inst in diskless_instances:
2977 assert inst not in instdisk
2980 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2981 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2982 compat.all(isinstance(s, (tuple, list)) and
2983 len(s) == 2 for s in statuses)
2984 for inst, nnames in instdisk.items()
2985 for nname, statuses in nnames.items())
2986 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2991 def _SshNodeSelector(group_uuid, all_nodes):
2992 """Create endless iterators for all potential SSH check hosts.
2995 nodes = [node for node in all_nodes
2996 if (node.group != group_uuid and
2998 keyfunc = operator.attrgetter("group")
3000 return map(itertools.cycle,
3001 [sorted(map(operator.attrgetter("name"), names))
3002 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3006 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3007 """Choose which nodes should talk to which other nodes.
3009 We will make nodes contact all nodes in their group, and one node from
3012 @warning: This algorithm has a known issue if one node group is much
3013 smaller than others (e.g. just one node). In such a case all other
3014 nodes will talk to the single node.
3017 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3018 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3020 return (online_nodes,
3021 dict((name, sorted([i.next() for i in sel]))
3022 for name in online_nodes))
3024 def BuildHooksEnv(self):
3027 Cluster-Verify hooks just ran in the post phase and their failure makes
3028 the output be logged in the verify output and the verification to fail.
3032 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3035 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3036 for node in self.my_node_info.values())
3040 def BuildHooksNodes(self):
3041 """Build hooks nodes.
3044 return ([], self.my_node_names)
3046 def Exec(self, feedback_fn):
3047 """Verify integrity of the node group, performing various test on nodes.
3050 # This method has too many local variables. pylint: disable=R0914
3051 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3053 if not self.my_node_names:
3055 feedback_fn("* Empty node group, skipping verification")
3059 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3060 verbose = self.op.verbose
3061 self._feedback_fn = feedback_fn
3063 vg_name = self.cfg.GetVGName()
3064 drbd_helper = self.cfg.GetDRBDHelper()
3065 cluster = self.cfg.GetClusterInfo()
3066 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3067 hypervisors = cluster.enabled_hypervisors
3068 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3070 i_non_redundant = [] # Non redundant instances
3071 i_non_a_balanced = [] # Non auto-balanced instances
3072 i_offline = 0 # Count of offline instances
3073 n_offline = 0 # Count of offline nodes
3074 n_drained = 0 # Count of nodes being drained
3075 node_vol_should = {}
3077 # FIXME: verify OS list
3080 filemap = _ComputeAncillaryFiles(cluster, False)
3082 # do local checksums
3083 master_node = self.master_node = self.cfg.GetMasterNode()
3084 master_ip = self.cfg.GetMasterIP()
3086 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3089 if self.cfg.GetUseExternalMipScript():
3090 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3092 node_verify_param = {
3093 constants.NV_FILELIST:
3094 utils.UniqueSequence(filename
3095 for files in filemap
3096 for filename in files),
3097 constants.NV_NODELIST:
3098 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3099 self.all_node_info.values()),
3100 constants.NV_HYPERVISOR: hypervisors,
3101 constants.NV_HVPARAMS:
3102 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3103 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3104 for node in node_data_list
3105 if not node.offline],
3106 constants.NV_INSTANCELIST: hypervisors,
3107 constants.NV_VERSION: None,
3108 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3109 constants.NV_NODESETUP: None,
3110 constants.NV_TIME: None,
3111 constants.NV_MASTERIP: (master_node, master_ip),
3112 constants.NV_OSLIST: None,
3113 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3114 constants.NV_USERSCRIPTS: user_scripts,
3117 if vg_name is not None:
3118 node_verify_param[constants.NV_VGLIST] = None
3119 node_verify_param[constants.NV_LVLIST] = vg_name
3120 node_verify_param[constants.NV_PVLIST] = [vg_name]
3121 node_verify_param[constants.NV_DRBDLIST] = None
3124 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3127 # FIXME: this needs to be changed per node-group, not cluster-wide
3129 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3130 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3131 bridges.add(default_nicpp[constants.NIC_LINK])
3132 for instance in self.my_inst_info.values():
3133 for nic in instance.nics:
3134 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3135 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3136 bridges.add(full_nic[constants.NIC_LINK])
3139 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3141 # Build our expected cluster state
3142 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3144 vm_capable=node.vm_capable))
3145 for node in node_data_list)
3149 for node in self.all_node_info.values():
3150 path = _SupportsOob(self.cfg, node)
3151 if path and path not in oob_paths:
3152 oob_paths.append(path)
3155 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3157 for instance in self.my_inst_names:
3158 inst_config = self.my_inst_info[instance]
3159 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3162 for nname in inst_config.all_nodes:
3163 if nname not in node_image:
3164 gnode = self.NodeImage(name=nname)
3165 gnode.ghost = (nname not in self.all_node_info)
3166 node_image[nname] = gnode
3168 inst_config.MapLVsByNode(node_vol_should)
3170 pnode = inst_config.primary_node
3171 node_image[pnode].pinst.append(instance)
3173 for snode in inst_config.secondary_nodes:
3174 nimg = node_image[snode]
3175 nimg.sinst.append(instance)
3176 if pnode not in nimg.sbp:
3177 nimg.sbp[pnode] = []
3178 nimg.sbp[pnode].append(instance)
3180 # At this point, we have the in-memory data structures complete,
3181 # except for the runtime information, which we'll gather next
3183 # Due to the way our RPC system works, exact response times cannot be
3184 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3185 # time before and after executing the request, we can at least have a time
3187 nvinfo_starttime = time.time()
3188 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3190 self.cfg.GetClusterName())
3191 nvinfo_endtime = time.time()
3193 if self.extra_lv_nodes and vg_name is not None:
3195 self.rpc.call_node_verify(self.extra_lv_nodes,
3196 {constants.NV_LVLIST: vg_name},
3197 self.cfg.GetClusterName())
3199 extra_lv_nvinfo = {}
3201 all_drbd_map = self.cfg.ComputeDRBDMap()
3203 feedback_fn("* Gathering disk information (%s nodes)" %
3204 len(self.my_node_names))
3205 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3208 feedback_fn("* Verifying configuration file consistency")
3210 # If not all nodes are being checked, we need to make sure the master node
3211 # and a non-checked vm_capable node are in the list.
3212 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3214 vf_nvinfo = all_nvinfo.copy()
3215 vf_node_info = list(self.my_node_info.values())
3216 additional_nodes = []
3217 if master_node not in self.my_node_info:
3218 additional_nodes.append(master_node)
3219 vf_node_info.append(self.all_node_info[master_node])
3220 # Add the first vm_capable node we find which is not included
3221 for node in absent_nodes:
3222 nodeinfo = self.all_node_info[node]
3223 if nodeinfo.vm_capable and not nodeinfo.offline:
3224 additional_nodes.append(node)
3225 vf_node_info.append(self.all_node_info[node])
3227 key = constants.NV_FILELIST
3228 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3229 {key: node_verify_param[key]},
3230 self.cfg.GetClusterName()))
3232 vf_nvinfo = all_nvinfo
3233 vf_node_info = self.my_node_info.values()
3235 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3237 feedback_fn("* Verifying node status")
3241 for node_i in node_data_list:
3243 nimg = node_image[node]
3247 feedback_fn("* Skipping offline node %s" % (node,))
3251 if node == master_node:
3253 elif node_i.master_candidate:
3254 ntype = "master candidate"
3255 elif node_i.drained:
3261 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3263 msg = all_nvinfo[node].fail_msg
3264 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3267 nimg.rpc_fail = True
3270 nresult = all_nvinfo[node].payload
3272 nimg.call_ok = self._VerifyNode(node_i, nresult)
3273 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3274 self._VerifyNodeNetwork(node_i, nresult)
3275 self._VerifyNodeUserScripts(node_i, nresult)
3276 self._VerifyOob(node_i, nresult)
3279 self._VerifyNodeLVM(node_i, nresult, vg_name)
3280 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3283 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3284 self._UpdateNodeInstances(node_i, nresult, nimg)
3285 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3286 self._UpdateNodeOS(node_i, nresult, nimg)
3288 if not nimg.os_fail:
3289 if refos_img is None:
3291 self._VerifyNodeOS(node_i, nimg, refos_img)
3292 self._VerifyNodeBridges(node_i, nresult, bridges)
3294 # Check whether all running instancies are primary for the node. (This
3295 # can no longer be done from _VerifyInstance below, since some of the
3296 # wrong instances could be from other node groups.)
3297 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3299 for inst in non_primary_inst:
3300 test = inst in self.all_inst_info
3301 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3302 "instance should not run on node %s", node_i.name)
3303 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3304 "node is running unknown instance %s", inst)
3306 for node, result in extra_lv_nvinfo.items():
3307 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3308 node_image[node], vg_name)
3310 feedback_fn("* Verifying instance status")
3311 for instance in self.my_inst_names:
3313 feedback_fn("* Verifying instance %s" % instance)
3314 inst_config = self.my_inst_info[instance]
3315 self._VerifyInstance(instance, inst_config, node_image,
3317 inst_nodes_offline = []
3319 pnode = inst_config.primary_node
3320 pnode_img = node_image[pnode]
3321 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3322 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3323 " primary node failed", instance)
3325 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3327 constants.CV_EINSTANCEBADNODE, instance,
3328 "instance is marked as running and lives on offline node %s",
3329 inst_config.primary_node)
3331 # If the instance is non-redundant we cannot survive losing its primary
3332 # node, so we are not N+1 compliant. On the other hand we have no disk
3333 # templates with more than one secondary so that situation is not well
3335 # FIXME: does not support file-backed instances
3336 if not inst_config.secondary_nodes:
3337 i_non_redundant.append(instance)
3339 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3340 constants.CV_EINSTANCELAYOUT,
3341 instance, "instance has multiple secondary nodes: %s",
3342 utils.CommaJoin(inst_config.secondary_nodes),
3343 code=self.ETYPE_WARNING)
3345 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3346 pnode = inst_config.primary_node
3347 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3348 instance_groups = {}
3350 for node in instance_nodes:
3351 instance_groups.setdefault(self.all_node_info[node].group,
3355 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3356 # Sort so that we always list the primary node first.
3357 for group, nodes in sorted(instance_groups.items(),
3358 key=lambda (_, nodes): pnode in nodes,
3361 self._ErrorIf(len(instance_groups) > 1,
3362 constants.CV_EINSTANCESPLITGROUPS,
3363 instance, "instance has primary and secondary nodes in"
3364 " different groups: %s", utils.CommaJoin(pretty_list),
3365 code=self.ETYPE_WARNING)
3367 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3368 i_non_a_balanced.append(instance)
3370 for snode in inst_config.secondary_nodes:
3371 s_img = node_image[snode]
3372 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3373 snode, "instance %s, connection to secondary node failed",
3377 inst_nodes_offline.append(snode)
3379 # warn that the instance lives on offline nodes
3380 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3381 "instance has offline secondary node(s) %s",
3382 utils.CommaJoin(inst_nodes_offline))
3383 # ... or ghost/non-vm_capable nodes
3384 for node in inst_config.all_nodes:
3385 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3386 instance, "instance lives on ghost node %s", node)
3387 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3388 instance, "instance lives on non-vm_capable node %s", node)
3390 feedback_fn("* Verifying orphan volumes")
3391 reserved = utils.FieldSet(*cluster.reserved_lvs)
3393 # We will get spurious "unknown volume" warnings if any node of this group
3394 # is secondary for an instance whose primary is in another group. To avoid
3395 # them, we find these instances and add their volumes to node_vol_should.
3396 for inst in self.all_inst_info.values():
3397 for secondary in inst.secondary_nodes:
3398 if (secondary in self.my_node_info
3399 and inst.name not in self.my_inst_info):
3400 inst.MapLVsByNode(node_vol_should)
3403 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3405 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3406 feedback_fn("* Verifying N+1 Memory redundancy")
3407 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3409 feedback_fn("* Other Notes")
3411 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3412 % len(i_non_redundant))
3414 if i_non_a_balanced:
3415 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3416 % len(i_non_a_balanced))
3419 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3422 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3425 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3429 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3430 """Analyze the post-hooks' result
3432 This method analyses the hook result, handles it, and sends some
3433 nicely-formatted feedback back to the user.
3435 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3436 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3437 @param hooks_results: the results of the multi-node hooks rpc call
3438 @param feedback_fn: function used send feedback back to the caller
3439 @param lu_result: previous Exec result
3440 @return: the new Exec result, based on the previous result
3444 # We only really run POST phase hooks, only for non-empty groups,
3445 # and are only interested in their results
3446 if not self.my_node_names:
3449 elif phase == constants.HOOKS_PHASE_POST:
3450 # Used to change hooks' output to proper indentation
3451 feedback_fn("* Hooks Results")
3452 assert hooks_results, "invalid result from hooks"
3454 for node_name in hooks_results:
3455 res = hooks_results[node_name]
3457 test = msg and not res.offline
3458 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3459 "Communication failure in hooks execution: %s", msg)
3460 if res.offline or msg:
3461 # No need to investigate payload if node is offline or gave
3464 for script, hkr, output in res.payload:
3465 test = hkr == constants.HKR_FAIL
3466 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3467 "Script %s failed, output:", script)
3469 output = self._HOOKS_INDENT_RE.sub(" ", output)
3470 feedback_fn("%s" % output)
3476 class LUClusterVerifyDisks(NoHooksLU):
3477 """Verifies the cluster disks status.
3482 def ExpandNames(self):
3483 self.share_locks = _ShareAll()
3484 self.needed_locks = {
3485 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3488 def Exec(self, feedback_fn):
3489 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3491 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3492 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3493 for group in group_names])
3496 class LUGroupVerifyDisks(NoHooksLU):
3497 """Verifies the status of all disks in a node group.
3502 def ExpandNames(self):
3503 # Raises errors.OpPrereqError on its own if group can't be found
3504 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3506 self.share_locks = _ShareAll()
3507 self.needed_locks = {
3508 locking.LEVEL_INSTANCE: [],
3509 locking.LEVEL_NODEGROUP: [],
3510 locking.LEVEL_NODE: [],
3513 def DeclareLocks(self, level):
3514 if level == locking.LEVEL_INSTANCE:
3515 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3517 # Lock instances optimistically, needs verification once node and group
3518 # locks have been acquired
3519 self.needed_locks[locking.LEVEL_INSTANCE] = \
3520 self.cfg.GetNodeGroupInstances(self.group_uuid)
3522 elif level == locking.LEVEL_NODEGROUP:
3523 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3525 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3526 set([self.group_uuid] +
3527 # Lock all groups used by instances optimistically; this requires
3528 # going via the node before it's locked, requiring verification
3531 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3532 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3534 elif level == locking.LEVEL_NODE:
3535 # This will only lock the nodes in the group to be verified which contain
3537 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3538 self._LockInstancesNodes()
3540 # Lock all nodes in group to be verified
3541 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3542 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3543 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3545 def CheckPrereq(self):
3546 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3547 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3548 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3550 assert self.group_uuid in owned_groups
3552 # Check if locked instances are still correct
3553 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3555 # Get instance information
3556 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3558 # Check if node groups for locked instances are still correct
3559 _CheckInstancesNodeGroups(self.cfg, self.instances,
3560 owned_groups, owned_nodes, self.group_uuid)
3562 def Exec(self, feedback_fn):
3563 """Verify integrity of cluster disks.
3565 @rtype: tuple of three items
3566 @return: a tuple of (dict of node-to-node_error, list of instances
3567 which need activate-disks, dict of instance: (node, volume) for
3572 res_instances = set()
3575 nv_dict = _MapInstanceDisksToNodes([inst
3576 for inst in self.instances.values()
3577 if inst.admin_state == constants.ADMINST_UP])
3580 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3581 set(self.cfg.GetVmCapableNodeList()))
3583 node_lvs = self.rpc.call_lv_list(nodes, [])
3585 for (node, node_res) in node_lvs.items():
3586 if node_res.offline:
3589 msg = node_res.fail_msg
3591 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3592 res_nodes[node] = msg
3595 for lv_name, (_, _, lv_online) in node_res.payload.items():
3596 inst = nv_dict.pop((node, lv_name), None)
3597 if not (lv_online or inst is None):
3598 res_instances.add(inst)
3600 # any leftover items in nv_dict are missing LVs, let's arrange the data
3602 for key, inst in nv_dict.iteritems():
3603 res_missing.setdefault(inst, []).append(list(key))
3605 return (res_nodes, list(res_instances), res_missing)
3608 class LUClusterRepairDiskSizes(NoHooksLU):
3609 """Verifies the cluster disks sizes.
3614 def ExpandNames(self):
3615 if self.op.instances:
3616 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3617 self.needed_locks = {
3618 locking.LEVEL_NODE_RES: [],
3619 locking.LEVEL_INSTANCE: self.wanted_names,
3621 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3623 self.wanted_names = None
3624 self.needed_locks = {
3625 locking.LEVEL_NODE_RES: locking.ALL_SET,
3626 locking.LEVEL_INSTANCE: locking.ALL_SET,
3628 self.share_locks = {
3629 locking.LEVEL_NODE_RES: 1,
3630 locking.LEVEL_INSTANCE: 0,
3633 def DeclareLocks(self, level):
3634 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3635 self._LockInstancesNodes(primary_only=True, level=level)
3637 def CheckPrereq(self):
3638 """Check prerequisites.
3640 This only checks the optional instance list against the existing names.
3643 if self.wanted_names is None:
3644 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3646 self.wanted_instances = \
3647 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3649 def _EnsureChildSizes(self, disk):
3650 """Ensure children of the disk have the needed disk size.
3652 This is valid mainly for DRBD8 and fixes an issue where the
3653 children have smaller disk size.
3655 @param disk: an L{ganeti.objects.Disk} object
3658 if disk.dev_type == constants.LD_DRBD8:
3659 assert disk.children, "Empty children for DRBD8?"
3660 fchild = disk.children[0]
3661 mismatch = fchild.size < disk.size
3663 self.LogInfo("Child disk has size %d, parent %d, fixing",
3664 fchild.size, disk.size)
3665 fchild.size = disk.size
3667 # and we recurse on this child only, not on the metadev
3668 return self._EnsureChildSizes(fchild) or mismatch
3672 def Exec(self, feedback_fn):
3673 """Verify the size of cluster disks.
3676 # TODO: check child disks too
3677 # TODO: check differences in size between primary/secondary nodes
3679 for instance in self.wanted_instances:
3680 pnode = instance.primary_node
3681 if pnode not in per_node_disks:
3682 per_node_disks[pnode] = []
3683 for idx, disk in enumerate(instance.disks):
3684 per_node_disks[pnode].append((instance, idx, disk))
3686 assert not (frozenset(per_node_disks.keys()) -
3687 self.owned_locks(locking.LEVEL_NODE_RES)), \
3688 "Not owning correct locks"
3689 assert not self.owned_locks(locking.LEVEL_NODE)
3692 for node, dskl in per_node_disks.items():
3693 newl = [v[2].Copy() for v in dskl]
3695 self.cfg.SetDiskID(dsk, node)
3696 result = self.rpc.call_blockdev_getsize(node, newl)
3698 self.LogWarning("Failure in blockdev_getsize call to node"
3699 " %s, ignoring", node)
3701 if len(result.payload) != len(dskl):
3702 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3703 " result.payload=%s", node, len(dskl), result.payload)
3704 self.LogWarning("Invalid result from node %s, ignoring node results",
3707 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3709 self.LogWarning("Disk %d of instance %s did not return size"
3710 " information, ignoring", idx, instance.name)
3712 if not isinstance(size, (int, long)):
3713 self.LogWarning("Disk %d of instance %s did not return valid"
3714 " size information, ignoring", idx, instance.name)
3717 if size != disk.size:
3718 self.LogInfo("Disk %d of instance %s has mismatched size,"
3719 " correcting: recorded %d, actual %d", idx,
3720 instance.name, disk.size, size)
3722 self.cfg.Update(instance, feedback_fn)
3723 changed.append((instance.name, idx, size))
3724 if self._EnsureChildSizes(disk):
3725 self.cfg.Update(instance, feedback_fn)
3726 changed.append((instance.name, idx, disk.size))
3730 class LUClusterRename(LogicalUnit):
3731 """Rename the cluster.
3734 HPATH = "cluster-rename"
3735 HTYPE = constants.HTYPE_CLUSTER
3737 def BuildHooksEnv(self):
3742 "OP_TARGET": self.cfg.GetClusterName(),
3743 "NEW_NAME": self.op.name,
3746 def BuildHooksNodes(self):
3747 """Build hooks nodes.
3750 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3752 def CheckPrereq(self):
3753 """Verify that the passed name is a valid one.
3756 hostname = netutils.GetHostname(name=self.op.name,
3757 family=self.cfg.GetPrimaryIPFamily())
3759 new_name = hostname.name
3760 self.ip = new_ip = hostname.ip
3761 old_name = self.cfg.GetClusterName()
3762 old_ip = self.cfg.GetMasterIP()
3763 if new_name == old_name and new_ip == old_ip:
3764 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3765 " cluster has changed",
3767 if new_ip != old_ip:
3768 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3769 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3770 " reachable on the network" %
3771 new_ip, errors.ECODE_NOTUNIQUE)
3773 self.op.name = new_name
3775 def Exec(self, feedback_fn):
3776 """Rename the cluster.
3779 clustername = self.op.name
3782 # shutdown the master IP
3783 master_params = self.cfg.GetMasterNetworkParameters()
3784 ems = self.cfg.GetUseExternalMipScript()
3785 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3787 result.Raise("Could not disable the master role")
3790 cluster = self.cfg.GetClusterInfo()
3791 cluster.cluster_name = clustername
3792 cluster.master_ip = new_ip
3793 self.cfg.Update(cluster, feedback_fn)
3795 # update the known hosts file
3796 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3797 node_list = self.cfg.GetOnlineNodeList()
3799 node_list.remove(master_params.name)
3802 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3804 master_params.ip = new_ip
3805 result = self.rpc.call_node_activate_master_ip(master_params.name,
3807 msg = result.fail_msg
3809 self.LogWarning("Could not re-enable the master role on"
3810 " the master, please restart manually: %s", msg)
3815 def _ValidateNetmask(cfg, netmask):
3816 """Checks if a netmask is valid.
3818 @type cfg: L{config.ConfigWriter}
3819 @param cfg: The cluster configuration
3821 @param netmask: the netmask to be verified
3822 @raise errors.OpPrereqError: if the validation fails
3825 ip_family = cfg.GetPrimaryIPFamily()
3827 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3828 except errors.ProgrammerError:
3829 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3831 if not ipcls.ValidateNetmask(netmask):
3832 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3836 class LUClusterSetParams(LogicalUnit):
3837 """Change the parameters of the cluster.
3840 HPATH = "cluster-modify"
3841 HTYPE = constants.HTYPE_CLUSTER
3844 def CheckArguments(self):
3848 if self.op.uid_pool:
3849 uidpool.CheckUidPool(self.op.uid_pool)
3851 if self.op.add_uids:
3852 uidpool.CheckUidPool(self.op.add_uids)
3854 if self.op.remove_uids:
3855 uidpool.CheckUidPool(self.op.remove_uids)
3857 if self.op.master_netmask is not None:
3858 _ValidateNetmask(self.cfg, self.op.master_netmask)
3860 if self.op.diskparams:
3861 for dt_params in self.op.diskparams.values():
3862 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3864 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3865 except errors.OpPrereqError, err:
3866 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3869 def ExpandNames(self):
3870 # FIXME: in the future maybe other cluster params won't require checking on
3871 # all nodes to be modified.
3872 self.needed_locks = {
3873 locking.LEVEL_NODE: locking.ALL_SET,
3874 locking.LEVEL_INSTANCE: locking.ALL_SET,
3875 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3877 self.share_locks = {
3878 locking.LEVEL_NODE: 1,
3879 locking.LEVEL_INSTANCE: 1,
3880 locking.LEVEL_NODEGROUP: 1,
3883 def BuildHooksEnv(self):
3888 "OP_TARGET": self.cfg.GetClusterName(),
3889 "NEW_VG_NAME": self.op.vg_name,
3892 def BuildHooksNodes(self):
3893 """Build hooks nodes.
3896 mn = self.cfg.GetMasterNode()
3899 def CheckPrereq(self):
3900 """Check prerequisites.
3902 This checks whether the given params don't conflict and
3903 if the given volume group is valid.
3906 if self.op.vg_name is not None and not self.op.vg_name:
3907 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3908 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3909 " instances exist", errors.ECODE_INVAL)
3911 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3912 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3913 raise errors.OpPrereqError("Cannot disable drbd helper while"
3914 " drbd-based instances exist",
3917 node_list = self.owned_locks(locking.LEVEL_NODE)
3919 # if vg_name not None, checks given volume group on all nodes
3921 vglist = self.rpc.call_vg_list(node_list)
3922 for node in node_list:
3923 msg = vglist[node].fail_msg
3925 # ignoring down node
3926 self.LogWarning("Error while gathering data on node %s"
3927 " (ignoring node): %s", node, msg)
3929 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3931 constants.MIN_VG_SIZE)
3933 raise errors.OpPrereqError("Error on node '%s': %s" %
3934 (node, vgstatus), errors.ECODE_ENVIRON)
3936 if self.op.drbd_helper:
3937 # checks given drbd helper on all nodes
3938 helpers = self.rpc.call_drbd_helper(node_list)
3939 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3941 self.LogInfo("Not checking drbd helper on offline node %s", node)
3943 msg = helpers[node].fail_msg
3945 raise errors.OpPrereqError("Error checking drbd helper on node"
3946 " '%s': %s" % (node, msg),
3947 errors.ECODE_ENVIRON)
3948 node_helper = helpers[node].payload
3949 if node_helper != self.op.drbd_helper:
3950 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3951 (node, node_helper), errors.ECODE_ENVIRON)
3953 self.cluster = cluster = self.cfg.GetClusterInfo()
3954 # validate params changes
3955 if self.op.beparams:
3956 objects.UpgradeBeParams(self.op.beparams)
3957 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3958 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3960 if self.op.ndparams:
3961 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3962 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3964 # TODO: we need a more general way to handle resetting
3965 # cluster-level parameters to default values
3966 if self.new_ndparams["oob_program"] == "":
3967 self.new_ndparams["oob_program"] = \
3968 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3970 if self.op.hv_state:
3971 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3972 self.cluster.hv_state_static)
3973 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3974 for hv, values in new_hv_state.items())
3976 if self.op.disk_state:
3977 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3978 self.cluster.disk_state_static)
3979 self.new_disk_state = \
3980 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3981 for name, values in svalues.items()))
3982 for storage, svalues in new_disk_state.items())
3985 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3988 all_instances = self.cfg.GetAllInstancesInfo().values()
3990 for group in self.cfg.GetAllNodeGroupsInfo().values():
3991 instances = frozenset([inst for inst in all_instances
3992 if compat.any(node in group.members
3993 for node in inst.all_nodes)])
3994 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3995 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3997 new_ipolicy, instances)
3999 violations.update(new)
4002 self.LogWarning("After the ipolicy change the following instances"
4003 " violate them: %s",
4004 utils.CommaJoin(utils.NiceSort(violations)))
4006 if self.op.nicparams:
4007 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4008 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4009 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4012 # check all instances for consistency
4013 for instance in self.cfg.GetAllInstancesInfo().values():
4014 for nic_idx, nic in enumerate(instance.nics):
4015 params_copy = copy.deepcopy(nic.nicparams)
4016 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4018 # check parameter syntax
4020 objects.NIC.CheckParameterSyntax(params_filled)
4021 except errors.ConfigurationError, err:
4022 nic_errors.append("Instance %s, nic/%d: %s" %
4023 (instance.name, nic_idx, err))
4025 # if we're moving instances to routed, check that they have an ip
4026 target_mode = params_filled[constants.NIC_MODE]
4027 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4028 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4029 " address" % (instance.name, nic_idx))
4031 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4032 "\n".join(nic_errors))
4034 # hypervisor list/parameters
4035 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4036 if self.op.hvparams:
4037 for hv_name, hv_dict in self.op.hvparams.items():
4038 if hv_name not in self.new_hvparams:
4039 self.new_hvparams[hv_name] = hv_dict
4041 self.new_hvparams[hv_name].update(hv_dict)
4043 # disk template parameters
4044 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4045 if self.op.diskparams:
4046 for dt_name, dt_params in self.op.diskparams.items():
4047 if dt_name not in self.op.diskparams:
4048 self.new_diskparams[dt_name] = dt_params
4050 self.new_diskparams[dt_name].update(dt_params)
4052 # os hypervisor parameters
4053 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4055 for os_name, hvs in self.op.os_hvp.items():
4056 if os_name not in self.new_os_hvp:
4057 self.new_os_hvp[os_name] = hvs
4059 for hv_name, hv_dict in hvs.items():
4060 if hv_name not in self.new_os_hvp[os_name]:
4061 self.new_os_hvp[os_name][hv_name] = hv_dict
4063 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4066 self.new_osp = objects.FillDict(cluster.osparams, {})
4067 if self.op.osparams:
4068 for os_name, osp in self.op.osparams.items():
4069 if os_name not in self.new_osp:
4070 self.new_osp[os_name] = {}
4072 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4075 if not self.new_osp[os_name]:
4076 # we removed all parameters
4077 del self.new_osp[os_name]
4079 # check the parameter validity (remote check)
4080 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4081 os_name, self.new_osp[os_name])
4083 # changes to the hypervisor list
4084 if self.op.enabled_hypervisors is not None:
4085 self.hv_list = self.op.enabled_hypervisors
4086 for hv in self.hv_list:
4087 # if the hypervisor doesn't already exist in the cluster
4088 # hvparams, we initialize it to empty, and then (in both
4089 # cases) we make sure to fill the defaults, as we might not
4090 # have a complete defaults list if the hypervisor wasn't
4092 if hv not in new_hvp:
4094 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4095 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4097 self.hv_list = cluster.enabled_hypervisors
4099 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4100 # either the enabled list has changed, or the parameters have, validate
4101 for hv_name, hv_params in self.new_hvparams.items():
4102 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4103 (self.op.enabled_hypervisors and
4104 hv_name in self.op.enabled_hypervisors)):
4105 # either this is a new hypervisor, or its parameters have changed
4106 hv_class = hypervisor.GetHypervisor(hv_name)
4107 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4108 hv_class.CheckParameterSyntax(hv_params)
4109 _CheckHVParams(self, node_list, hv_name, hv_params)
4112 # no need to check any newly-enabled hypervisors, since the
4113 # defaults have already been checked in the above code-block
4114 for os_name, os_hvp in self.new_os_hvp.items():
4115 for hv_name, hv_params in os_hvp.items():
4116 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4117 # we need to fill in the new os_hvp on top of the actual hv_p
4118 cluster_defaults = self.new_hvparams.get(hv_name, {})
4119 new_osp = objects.FillDict(cluster_defaults, hv_params)
4120 hv_class = hypervisor.GetHypervisor(hv_name)
4121 hv_class.CheckParameterSyntax(new_osp)
4122 _CheckHVParams(self, node_list, hv_name, new_osp)
4124 if self.op.default_iallocator:
4125 alloc_script = utils.FindFile(self.op.default_iallocator,
4126 constants.IALLOCATOR_SEARCH_PATH,
4128 if alloc_script is None:
4129 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4130 " specified" % self.op.default_iallocator,
4133 def Exec(self, feedback_fn):
4134 """Change the parameters of the cluster.
4137 if self.op.vg_name is not None:
4138 new_volume = self.op.vg_name
4141 if new_volume != self.cfg.GetVGName():
4142 self.cfg.SetVGName(new_volume)
4144 feedback_fn("Cluster LVM configuration already in desired"
4145 " state, not changing")
4146 if self.op.drbd_helper is not None:
4147 new_helper = self.op.drbd_helper
4150 if new_helper != self.cfg.GetDRBDHelper():
4151 self.cfg.SetDRBDHelper(new_helper)
4153 feedback_fn("Cluster DRBD helper already in desired state,"
4155 if self.op.hvparams:
4156 self.cluster.hvparams = self.new_hvparams
4158 self.cluster.os_hvp = self.new_os_hvp
4159 if self.op.enabled_hypervisors is not None:
4160 self.cluster.hvparams = self.new_hvparams
4161 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4162 if self.op.beparams:
4163 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4164 if self.op.nicparams:
4165 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4167 self.cluster.ipolicy = self.new_ipolicy
4168 if self.op.osparams:
4169 self.cluster.osparams = self.new_osp
4170 if self.op.ndparams:
4171 self.cluster.ndparams = self.new_ndparams
4172 if self.op.diskparams:
4173 self.cluster.diskparams = self.new_diskparams
4174 if self.op.hv_state:
4175 self.cluster.hv_state_static = self.new_hv_state
4176 if self.op.disk_state:
4177 self.cluster.disk_state_static = self.new_disk_state
4179 if self.op.candidate_pool_size is not None:
4180 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4181 # we need to update the pool size here, otherwise the save will fail
4182 _AdjustCandidatePool(self, [])
4184 if self.op.maintain_node_health is not None:
4185 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4186 feedback_fn("Note: CONFD was disabled at build time, node health"
4187 " maintenance is not useful (still enabling it)")
4188 self.cluster.maintain_node_health = self.op.maintain_node_health
4190 if self.op.prealloc_wipe_disks is not None:
4191 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4193 if self.op.add_uids is not None:
4194 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4196 if self.op.remove_uids is not None:
4197 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4199 if self.op.uid_pool is not None:
4200 self.cluster.uid_pool = self.op.uid_pool
4202 if self.op.default_iallocator is not None:
4203 self.cluster.default_iallocator = self.op.default_iallocator
4205 if self.op.reserved_lvs is not None:
4206 self.cluster.reserved_lvs = self.op.reserved_lvs
4208 if self.op.use_external_mip_script is not None:
4209 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4211 def helper_os(aname, mods, desc):
4213 lst = getattr(self.cluster, aname)
4214 for key, val in mods:
4215 if key == constants.DDM_ADD:
4217 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4220 elif key == constants.DDM_REMOVE:
4224 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4226 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4228 if self.op.hidden_os:
4229 helper_os("hidden_os", self.op.hidden_os, "hidden")
4231 if self.op.blacklisted_os:
4232 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4234 if self.op.master_netdev:
4235 master_params = self.cfg.GetMasterNetworkParameters()
4236 ems = self.cfg.GetUseExternalMipScript()
4237 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4238 self.cluster.master_netdev)
4239 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4241 result.Raise("Could not disable the master ip")
4242 feedback_fn("Changing master_netdev from %s to %s" %
4243 (master_params.netdev, self.op.master_netdev))
4244 self.cluster.master_netdev = self.op.master_netdev
4246 if self.op.master_netmask:
4247 master_params = self.cfg.GetMasterNetworkParameters()
4248 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4249 result = self.rpc.call_node_change_master_netmask(master_params.name,
4250 master_params.netmask,
4251 self.op.master_netmask,
4253 master_params.netdev)
4255 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4258 self.cluster.master_netmask = self.op.master_netmask
4260 self.cfg.Update(self.cluster, feedback_fn)
4262 if self.op.master_netdev:
4263 master_params = self.cfg.GetMasterNetworkParameters()
4264 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4265 self.op.master_netdev)
4266 ems = self.cfg.GetUseExternalMipScript()
4267 result = self.rpc.call_node_activate_master_ip(master_params.name,
4270 self.LogWarning("Could not re-enable the master ip on"
4271 " the master, please restart manually: %s",
4275 def _UploadHelper(lu, nodes, fname):
4276 """Helper for uploading a file and showing warnings.
4279 if os.path.exists(fname):
4280 result = lu.rpc.call_upload_file(nodes, fname)
4281 for to_node, to_result in result.items():
4282 msg = to_result.fail_msg
4284 msg = ("Copy of file %s to node %s failed: %s" %
4285 (fname, to_node, msg))
4286 lu.proc.LogWarning(msg)
4289 def _ComputeAncillaryFiles(cluster, redist):
4290 """Compute files external to Ganeti which need to be consistent.
4292 @type redist: boolean
4293 @param redist: Whether to include files which need to be redistributed
4296 # Compute files for all nodes
4298 constants.SSH_KNOWN_HOSTS_FILE,
4299 constants.CONFD_HMAC_KEY,
4300 constants.CLUSTER_DOMAIN_SECRET_FILE,
4301 constants.SPICE_CERT_FILE,
4302 constants.SPICE_CACERT_FILE,
4303 constants.RAPI_USERS_FILE,
4307 files_all.update(constants.ALL_CERT_FILES)
4308 files_all.update(ssconf.SimpleStore().GetFileList())
4310 # we need to ship at least the RAPI certificate
4311 files_all.add(constants.RAPI_CERT_FILE)
4313 if cluster.modify_etc_hosts:
4314 files_all.add(constants.ETC_HOSTS)
4316 if cluster.use_external_mip_script:
4317 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4319 # Files which are optional, these must:
4320 # - be present in one other category as well
4321 # - either exist or not exist on all nodes of that category (mc, vm all)
4323 constants.RAPI_USERS_FILE,
4326 # Files which should only be on master candidates
4330 files_mc.add(constants.CLUSTER_CONF_FILE)
4332 # Files which should only be on VM-capable nodes
4333 files_vm = set(filename
4334 for hv_name in cluster.enabled_hypervisors
4335 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4337 files_opt |= set(filename
4338 for hv_name in cluster.enabled_hypervisors
4339 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4341 # Filenames in each category must be unique
4342 all_files_set = files_all | files_mc | files_vm
4343 assert (len(all_files_set) ==
4344 sum(map(len, [files_all, files_mc, files_vm]))), \
4345 "Found file listed in more than one file list"
4347 # Optional files must be present in one other category
4348 assert all_files_set.issuperset(files_opt), \
4349 "Optional file not in a different required list"
4351 return (files_all, files_opt, files_mc, files_vm)
4354 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4355 """Distribute additional files which are part of the cluster configuration.
4357 ConfigWriter takes care of distributing the config and ssconf files, but
4358 there are more files which should be distributed to all nodes. This function
4359 makes sure those are copied.
4361 @param lu: calling logical unit
4362 @param additional_nodes: list of nodes not in the config to distribute to
4363 @type additional_vm: boolean
4364 @param additional_vm: whether the additional nodes are vm-capable or not
4367 # Gather target nodes
4368 cluster = lu.cfg.GetClusterInfo()
4369 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4371 online_nodes = lu.cfg.GetOnlineNodeList()
4372 online_set = frozenset(online_nodes)
4373 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4375 if additional_nodes is not None:
4376 online_nodes.extend(additional_nodes)
4378 vm_nodes.extend(additional_nodes)
4380 # Never distribute to master node
4381 for nodelist in [online_nodes, vm_nodes]:
4382 if master_info.name in nodelist:
4383 nodelist.remove(master_info.name)
4386 (files_all, _, files_mc, files_vm) = \
4387 _ComputeAncillaryFiles(cluster, True)
4389 # Never re-distribute configuration file from here
4390 assert not (constants.CLUSTER_CONF_FILE in files_all or
4391 constants.CLUSTER_CONF_FILE in files_vm)
4392 assert not files_mc, "Master candidates not handled in this function"
4395 (online_nodes, files_all),
4396 (vm_nodes, files_vm),
4400 for (node_list, files) in filemap:
4402 _UploadHelper(lu, node_list, fname)
4405 class LUClusterRedistConf(NoHooksLU):
4406 """Force the redistribution of cluster configuration.
4408 This is a very simple LU.
4413 def ExpandNames(self):
4414 self.needed_locks = {
4415 locking.LEVEL_NODE: locking.ALL_SET,
4417 self.share_locks[locking.LEVEL_NODE] = 1
4419 def Exec(self, feedback_fn):
4420 """Redistribute the configuration.
4423 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4424 _RedistributeAncillaryFiles(self)
4427 class LUClusterActivateMasterIp(NoHooksLU):
4428 """Activate the master IP on the master node.
4431 def Exec(self, feedback_fn):
4432 """Activate the master IP.
4435 master_params = self.cfg.GetMasterNetworkParameters()
4436 ems = self.cfg.GetUseExternalMipScript()
4437 result = self.rpc.call_node_activate_master_ip(master_params.name,
4439 result.Raise("Could not activate the master IP")
4442 class LUClusterDeactivateMasterIp(NoHooksLU):
4443 """Deactivate the master IP on the master node.
4446 def Exec(self, feedback_fn):
4447 """Deactivate the master IP.
4450 master_params = self.cfg.GetMasterNetworkParameters()
4451 ems = self.cfg.GetUseExternalMipScript()
4452 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4454 result.Raise("Could not deactivate the master IP")
4457 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4458 """Sleep and poll for an instance's disk to sync.
4461 if not instance.disks or disks is not None and not disks:
4464 disks = _ExpandCheckDisks(instance, disks)
4467 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4469 node = instance.primary_node
4472 lu.cfg.SetDiskID(dev, node)
4474 # TODO: Convert to utils.Retry
4477 degr_retries = 10 # in seconds, as we sleep 1 second each time
4481 cumul_degraded = False
4482 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4483 msg = rstats.fail_msg
4485 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4488 raise errors.RemoteError("Can't contact node %s for mirror data,"
4489 " aborting." % node)
4492 rstats = rstats.payload
4494 for i, mstat in enumerate(rstats):
4496 lu.LogWarning("Can't compute data for node %s/%s",
4497 node, disks[i].iv_name)
4500 cumul_degraded = (cumul_degraded or
4501 (mstat.is_degraded and mstat.sync_percent is None))
4502 if mstat.sync_percent is not None:
4504 if mstat.estimated_time is not None:
4505 rem_time = ("%s remaining (estimated)" %
4506 utils.FormatSeconds(mstat.estimated_time))
4507 max_time = mstat.estimated_time
4509 rem_time = "no time estimate"
4510 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4511 (disks[i].iv_name, mstat.sync_percent, rem_time))
4513 # if we're done but degraded, let's do a few small retries, to
4514 # make sure we see a stable and not transient situation; therefore
4515 # we force restart of the loop
4516 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4517 logging.info("Degraded disks found, %d retries left", degr_retries)
4525 time.sleep(min(60, max_time))
4528 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4529 return not cumul_degraded
4532 def _BlockdevFind(lu, node, dev, instance):
4533 """Wrapper around call_blockdev_find to annotate diskparams.
4535 @param lu: A reference to the lu object
4536 @param node: The node to call out
4537 @param dev: The device to find
4538 @param instance: The instance object the device belongs to
4539 @returns The result of the rpc call
4542 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4543 return lu.rpc.call_blockdev_find(node, disk)
4546 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4547 """Wrapper around L{_CheckDiskConsistencyInner}.
4550 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4551 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4555 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4557 """Check that mirrors are not degraded.
4559 @attention: The device has to be annotated already.
4561 The ldisk parameter, if True, will change the test from the
4562 is_degraded attribute (which represents overall non-ok status for
4563 the device(s)) to the ldisk (representing the local storage status).
4566 lu.cfg.SetDiskID(dev, node)
4570 if on_primary or dev.AssembleOnSecondary():
4571 rstats = lu.rpc.call_blockdev_find(node, dev)
4572 msg = rstats.fail_msg
4574 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4576 elif not rstats.payload:
4577 lu.LogWarning("Can't find disk on node %s", node)
4581 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4583 result = result and not rstats.payload.is_degraded
4586 for child in dev.children:
4587 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4593 class LUOobCommand(NoHooksLU):
4594 """Logical unit for OOB handling.
4598 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4600 def ExpandNames(self):
4601 """Gather locks we need.
4604 if self.op.node_names:
4605 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4606 lock_names = self.op.node_names
4608 lock_names = locking.ALL_SET
4610 self.needed_locks = {
4611 locking.LEVEL_NODE: lock_names,
4614 def CheckPrereq(self):
4615 """Check prerequisites.
4618 - the node exists in the configuration
4621 Any errors are signaled by raising errors.OpPrereqError.
4625 self.master_node = self.cfg.GetMasterNode()
4627 assert self.op.power_delay >= 0.0
4629 if self.op.node_names:
4630 if (self.op.command in self._SKIP_MASTER and
4631 self.master_node in self.op.node_names):
4632 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4633 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4635 if master_oob_handler:
4636 additional_text = ("run '%s %s %s' if you want to operate on the"
4637 " master regardless") % (master_oob_handler,
4641 additional_text = "it does not support out-of-band operations"
4643 raise errors.OpPrereqError(("Operating on the master node %s is not"
4644 " allowed for %s; %s") %
4645 (self.master_node, self.op.command,
4646 additional_text), errors.ECODE_INVAL)
4648 self.op.node_names = self.cfg.GetNodeList()
4649 if self.op.command in self._SKIP_MASTER:
4650 self.op.node_names.remove(self.master_node)
4652 if self.op.command in self._SKIP_MASTER:
4653 assert self.master_node not in self.op.node_names
4655 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4657 raise errors.OpPrereqError("Node %s not found" % node_name,
4660 self.nodes.append(node)
4662 if (not self.op.ignore_status and
4663 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4664 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4665 " not marked offline") % node_name,
4668 def Exec(self, feedback_fn):
4669 """Execute OOB and return result if we expect any.
4672 master_node = self.master_node
4675 for idx, node in enumerate(utils.NiceSort(self.nodes,
4676 key=lambda node: node.name)):
4677 node_entry = [(constants.RS_NORMAL, node.name)]
4678 ret.append(node_entry)
4680 oob_program = _SupportsOob(self.cfg, node)
4683 node_entry.append((constants.RS_UNAVAIL, None))
4686 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4687 self.op.command, oob_program, node.name)
4688 result = self.rpc.call_run_oob(master_node, oob_program,
4689 self.op.command, node.name,
4693 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4694 node.name, result.fail_msg)
4695 node_entry.append((constants.RS_NODATA, None))
4698 self._CheckPayload(result)
4699 except errors.OpExecError, err:
4700 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4702 node_entry.append((constants.RS_NODATA, None))
4704 if self.op.command == constants.OOB_HEALTH:
4705 # For health we should log important events
4706 for item, status in result.payload:
4707 if status in [constants.OOB_STATUS_WARNING,
4708 constants.OOB_STATUS_CRITICAL]:
4709 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4710 item, node.name, status)
4712 if self.op.command == constants.OOB_POWER_ON:
4714 elif self.op.command == constants.OOB_POWER_OFF:
4715 node.powered = False
4716 elif self.op.command == constants.OOB_POWER_STATUS:
4717 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4718 if powered != node.powered:
4719 logging.warning(("Recorded power state (%s) of node '%s' does not"
4720 " match actual power state (%s)"), node.powered,
4723 # For configuration changing commands we should update the node
4724 if self.op.command in (constants.OOB_POWER_ON,
4725 constants.OOB_POWER_OFF):
4726 self.cfg.Update(node, feedback_fn)
4728 node_entry.append((constants.RS_NORMAL, result.payload))
4730 if (self.op.command == constants.OOB_POWER_ON and
4731 idx < len(self.nodes) - 1):
4732 time.sleep(self.op.power_delay)
4736 def _CheckPayload(self, result):
4737 """Checks if the payload is valid.
4739 @param result: RPC result
4740 @raises errors.OpExecError: If payload is not valid
4744 if self.op.command == constants.OOB_HEALTH:
4745 if not isinstance(result.payload, list):
4746 errs.append("command 'health' is expected to return a list but got %s" %
4747 type(result.payload))
4749 for item, status in result.payload:
4750 if status not in constants.OOB_STATUSES:
4751 errs.append("health item '%s' has invalid status '%s'" %
4754 if self.op.command == constants.OOB_POWER_STATUS:
4755 if not isinstance(result.payload, dict):
4756 errs.append("power-status is expected to return a dict but got %s" %
4757 type(result.payload))
4759 if self.op.command in [
4760 constants.OOB_POWER_ON,
4761 constants.OOB_POWER_OFF,
4762 constants.OOB_POWER_CYCLE,
4764 if result.payload is not None:
4765 errs.append("%s is expected to not return payload but got '%s'" %
4766 (self.op.command, result.payload))
4769 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4770 utils.CommaJoin(errs))
4773 class _OsQuery(_QueryBase):
4774 FIELDS = query.OS_FIELDS
4776 def ExpandNames(self, lu):
4777 # Lock all nodes in shared mode
4778 # Temporary removal of locks, should be reverted later
4779 # TODO: reintroduce locks when they are lighter-weight
4780 lu.needed_locks = {}
4781 #self.share_locks[locking.LEVEL_NODE] = 1
4782 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4784 # The following variables interact with _QueryBase._GetNames
4786 self.wanted = self.names
4788 self.wanted = locking.ALL_SET
4790 self.do_locking = self.use_locking
4792 def DeclareLocks(self, lu, level):
4796 def _DiagnoseByOS(rlist):
4797 """Remaps a per-node return list into an a per-os per-node dictionary
4799 @param rlist: a map with node names as keys and OS objects as values
4802 @return: a dictionary with osnames as keys and as value another
4803 map, with nodes as keys and tuples of (path, status, diagnose,
4804 variants, parameters, api_versions) as values, eg::
4806 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4807 (/srv/..., False, "invalid api")],
4808 "node2": [(/srv/..., True, "", [], [])]}
4813 # we build here the list of nodes that didn't fail the RPC (at RPC
4814 # level), so that nodes with a non-responding node daemon don't
4815 # make all OSes invalid
4816 good_nodes = [node_name for node_name in rlist
4817 if not rlist[node_name].fail_msg]
4818 for node_name, nr in rlist.items():
4819 if nr.fail_msg or not nr.payload:
4821 for (name, path, status, diagnose, variants,
4822 params, api_versions) in nr.payload:
4823 if name not in all_os:
4824 # build a list of nodes for this os containing empty lists
4825 # for each node in node_list
4827 for nname in good_nodes:
4828 all_os[name][nname] = []
4829 # convert params from [name, help] to (name, help)
4830 params = [tuple(v) for v in params]
4831 all_os[name][node_name].append((path, status, diagnose,
4832 variants, params, api_versions))
4835 def _GetQueryData(self, lu):
4836 """Computes the list of nodes and their attributes.
4839 # Locking is not used
4840 assert not (compat.any(lu.glm.is_owned(level)
4841 for level in locking.LEVELS
4842 if level != locking.LEVEL_CLUSTER) or
4843 self.do_locking or self.use_locking)
4845 valid_nodes = [node.name
4846 for node in lu.cfg.GetAllNodesInfo().values()
4847 if not node.offline and node.vm_capable]
4848 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4849 cluster = lu.cfg.GetClusterInfo()
4853 for (os_name, os_data) in pol.items():
4854 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4855 hidden=(os_name in cluster.hidden_os),
4856 blacklisted=(os_name in cluster.blacklisted_os))
4860 api_versions = set()
4862 for idx, osl in enumerate(os_data.values()):
4863 info.valid = bool(info.valid and osl and osl[0][1])
4867 (node_variants, node_params, node_api) = osl[0][3:6]
4870 variants.update(node_variants)
4871 parameters.update(node_params)
4872 api_versions.update(node_api)
4874 # Filter out inconsistent values
4875 variants.intersection_update(node_variants)
4876 parameters.intersection_update(node_params)
4877 api_versions.intersection_update(node_api)
4879 info.variants = list(variants)
4880 info.parameters = list(parameters)
4881 info.api_versions = list(api_versions)
4883 data[os_name] = info
4885 # Prepare data in requested order
4886 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4890 class LUOsDiagnose(NoHooksLU):
4891 """Logical unit for OS diagnose/query.
4897 def _BuildFilter(fields, names):
4898 """Builds a filter for querying OSes.
4901 name_filter = qlang.MakeSimpleFilter("name", names)
4903 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4904 # respective field is not requested
4905 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4906 for fname in ["hidden", "blacklisted"]
4907 if fname not in fields]
4908 if "valid" not in fields:
4909 status_filter.append([qlang.OP_TRUE, "valid"])
4912 status_filter.insert(0, qlang.OP_AND)
4914 status_filter = None
4916 if name_filter and status_filter:
4917 return [qlang.OP_AND, name_filter, status_filter]
4921 return status_filter
4923 def CheckArguments(self):
4924 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4925 self.op.output_fields, False)
4927 def ExpandNames(self):
4928 self.oq.ExpandNames(self)
4930 def Exec(self, feedback_fn):
4931 return self.oq.OldStyleQuery(self)
4934 class LUNodeRemove(LogicalUnit):
4935 """Logical unit for removing a node.
4938 HPATH = "node-remove"
4939 HTYPE = constants.HTYPE_NODE
4941 def BuildHooksEnv(self):
4946 "OP_TARGET": self.op.node_name,
4947 "NODE_NAME": self.op.node_name,
4950 def BuildHooksNodes(self):
4951 """Build hooks nodes.
4953 This doesn't run on the target node in the pre phase as a failed
4954 node would then be impossible to remove.
4957 all_nodes = self.cfg.GetNodeList()
4959 all_nodes.remove(self.op.node_name)
4962 return (all_nodes, all_nodes)
4964 def CheckPrereq(self):
4965 """Check prerequisites.
4968 - the node exists in the configuration
4969 - it does not have primary or secondary instances
4970 - it's not the master
4972 Any errors are signaled by raising errors.OpPrereqError.
4975 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4976 node = self.cfg.GetNodeInfo(self.op.node_name)
4977 assert node is not None
4979 masternode = self.cfg.GetMasterNode()
4980 if node.name == masternode:
4981 raise errors.OpPrereqError("Node is the master node, failover to another"
4982 " node is required", errors.ECODE_INVAL)
4984 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4985 if node.name in instance.all_nodes:
4986 raise errors.OpPrereqError("Instance %s is still running on the node,"
4987 " please remove first" % instance_name,
4989 self.op.node_name = node.name
4992 def Exec(self, feedback_fn):
4993 """Removes the node from the cluster.
4997 logging.info("Stopping the node daemon and removing configs from node %s",
5000 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5002 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5005 # Promote nodes to master candidate as needed
5006 _AdjustCandidatePool(self, exceptions=[node.name])
5007 self.context.RemoveNode(node.name)
5009 # Run post hooks on the node before it's removed
5010 _RunPostHook(self, node.name)
5012 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5013 msg = result.fail_msg
5015 self.LogWarning("Errors encountered on the remote node while leaving"
5016 " the cluster: %s", msg)
5018 # Remove node from our /etc/hosts
5019 if self.cfg.GetClusterInfo().modify_etc_hosts:
5020 master_node = self.cfg.GetMasterNode()
5021 result = self.rpc.call_etc_hosts_modify(master_node,
5022 constants.ETC_HOSTS_REMOVE,
5024 result.Raise("Can't update hosts file with new host data")
5025 _RedistributeAncillaryFiles(self)
5028 class _NodeQuery(_QueryBase):
5029 FIELDS = query.NODE_FIELDS
5031 def ExpandNames(self, lu):
5032 lu.needed_locks = {}
5033 lu.share_locks = _ShareAll()
5036 self.wanted = _GetWantedNodes(lu, self.names)
5038 self.wanted = locking.ALL_SET
5040 self.do_locking = (self.use_locking and
5041 query.NQ_LIVE in self.requested_data)
5044 # If any non-static field is requested we need to lock the nodes
5045 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5047 def DeclareLocks(self, lu, level):
5050 def _GetQueryData(self, lu):
5051 """Computes the list of nodes and their attributes.
5054 all_info = lu.cfg.GetAllNodesInfo()
5056 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5058 # Gather data as requested
5059 if query.NQ_LIVE in self.requested_data:
5060 # filter out non-vm_capable nodes
5061 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5063 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5064 [lu.cfg.GetHypervisorType()])
5065 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5066 for (name, nresult) in node_data.items()
5067 if not nresult.fail_msg and nresult.payload)
5071 if query.NQ_INST in self.requested_data:
5072 node_to_primary = dict([(name, set()) for name in nodenames])
5073 node_to_secondary = dict([(name, set()) for name in nodenames])
5075 inst_data = lu.cfg.GetAllInstancesInfo()
5077 for inst in inst_data.values():
5078 if inst.primary_node in node_to_primary:
5079 node_to_primary[inst.primary_node].add(inst.name)
5080 for secnode in inst.secondary_nodes:
5081 if secnode in node_to_secondary:
5082 node_to_secondary[secnode].add(inst.name)
5084 node_to_primary = None
5085 node_to_secondary = None
5087 if query.NQ_OOB in self.requested_data:
5088 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5089 for name, node in all_info.iteritems())
5093 if query.NQ_GROUP in self.requested_data:
5094 groups = lu.cfg.GetAllNodeGroupsInfo()
5098 return query.NodeQueryData([all_info[name] for name in nodenames],
5099 live_data, lu.cfg.GetMasterNode(),
5100 node_to_primary, node_to_secondary, groups,
5101 oob_support, lu.cfg.GetClusterInfo())
5104 class LUNodeQuery(NoHooksLU):
5105 """Logical unit for querying nodes.
5108 # pylint: disable=W0142
5111 def CheckArguments(self):
5112 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5113 self.op.output_fields, self.op.use_locking)
5115 def ExpandNames(self):
5116 self.nq.ExpandNames(self)
5118 def DeclareLocks(self, level):
5119 self.nq.DeclareLocks(self, level)
5121 def Exec(self, feedback_fn):
5122 return self.nq.OldStyleQuery(self)
5125 class LUNodeQueryvols(NoHooksLU):
5126 """Logical unit for getting volumes on node(s).
5130 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5131 _FIELDS_STATIC = utils.FieldSet("node")
5133 def CheckArguments(self):
5134 _CheckOutputFields(static=self._FIELDS_STATIC,
5135 dynamic=self._FIELDS_DYNAMIC,
5136 selected=self.op.output_fields)
5138 def ExpandNames(self):
5139 self.share_locks = _ShareAll()
5140 self.needed_locks = {}
5142 if not self.op.nodes:
5143 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5145 self.needed_locks[locking.LEVEL_NODE] = \
5146 _GetWantedNodes(self, self.op.nodes)
5148 def Exec(self, feedback_fn):
5149 """Computes the list of nodes and their attributes.
5152 nodenames = self.owned_locks(locking.LEVEL_NODE)
5153 volumes = self.rpc.call_node_volumes(nodenames)
5155 ilist = self.cfg.GetAllInstancesInfo()
5156 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5159 for node in nodenames:
5160 nresult = volumes[node]
5163 msg = nresult.fail_msg
5165 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5168 node_vols = sorted(nresult.payload,
5169 key=operator.itemgetter("dev"))
5171 for vol in node_vols:
5173 for field in self.op.output_fields:
5176 elif field == "phys":
5180 elif field == "name":
5182 elif field == "size":
5183 val = int(float(vol["size"]))
5184 elif field == "instance":
5185 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5187 raise errors.ParameterError(field)
5188 node_output.append(str(val))
5190 output.append(node_output)
5195 class LUNodeQueryStorage(NoHooksLU):
5196 """Logical unit for getting information on storage units on node(s).
5199 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5202 def CheckArguments(self):
5203 _CheckOutputFields(static=self._FIELDS_STATIC,
5204 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5205 selected=self.op.output_fields)
5207 def ExpandNames(self):
5208 self.share_locks = _ShareAll()
5209 self.needed_locks = {}
5212 self.needed_locks[locking.LEVEL_NODE] = \
5213 _GetWantedNodes(self, self.op.nodes)
5215 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5217 def Exec(self, feedback_fn):
5218 """Computes the list of nodes and their attributes.
5221 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5223 # Always get name to sort by
5224 if constants.SF_NAME in self.op.output_fields:
5225 fields = self.op.output_fields[:]
5227 fields = [constants.SF_NAME] + self.op.output_fields
5229 # Never ask for node or type as it's only known to the LU
5230 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5231 while extra in fields:
5232 fields.remove(extra)
5234 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5235 name_idx = field_idx[constants.SF_NAME]
5237 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5238 data = self.rpc.call_storage_list(self.nodes,
5239 self.op.storage_type, st_args,
5240 self.op.name, fields)
5244 for node in utils.NiceSort(self.nodes):
5245 nresult = data[node]
5249 msg = nresult.fail_msg
5251 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5254 rows = dict([(row[name_idx], row) for row in nresult.payload])
5256 for name in utils.NiceSort(rows.keys()):
5261 for field in self.op.output_fields:
5262 if field == constants.SF_NODE:
5264 elif field == constants.SF_TYPE:
5265 val = self.op.storage_type
5266 elif field in field_idx:
5267 val = row[field_idx[field]]
5269 raise errors.ParameterError(field)
5278 class _InstanceQuery(_QueryBase):
5279 FIELDS = query.INSTANCE_FIELDS
5281 def ExpandNames(self, lu):
5282 lu.needed_locks = {}
5283 lu.share_locks = _ShareAll()
5286 self.wanted = _GetWantedInstances(lu, self.names)
5288 self.wanted = locking.ALL_SET
5290 self.do_locking = (self.use_locking and
5291 query.IQ_LIVE in self.requested_data)
5293 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5294 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5295 lu.needed_locks[locking.LEVEL_NODE] = []
5296 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5298 self.do_grouplocks = (self.do_locking and
5299 query.IQ_NODES in self.requested_data)
5301 def DeclareLocks(self, lu, level):
5303 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5304 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5306 # Lock all groups used by instances optimistically; this requires going
5307 # via the node before it's locked, requiring verification later on
5308 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5310 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5311 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5312 elif level == locking.LEVEL_NODE:
5313 lu._LockInstancesNodes() # pylint: disable=W0212
5316 def _CheckGroupLocks(lu):
5317 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5318 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5320 # Check if node groups for locked instances are still correct
5321 for instance_name in owned_instances:
5322 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5324 def _GetQueryData(self, lu):
5325 """Computes the list of instances and their attributes.
5328 if self.do_grouplocks:
5329 self._CheckGroupLocks(lu)
5331 cluster = lu.cfg.GetClusterInfo()
5332 all_info = lu.cfg.GetAllInstancesInfo()
5334 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5336 instance_list = [all_info[name] for name in instance_names]
5337 nodes = frozenset(itertools.chain(*(inst.all_nodes
5338 for inst in instance_list)))
5339 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5342 wrongnode_inst = set()
5344 # Gather data as requested
5345 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5347 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5349 result = node_data[name]
5351 # offline nodes will be in both lists
5352 assert result.fail_msg
5353 offline_nodes.append(name)
5355 bad_nodes.append(name)
5356 elif result.payload:
5357 for inst in result.payload:
5358 if inst in all_info:
5359 if all_info[inst].primary_node == name:
5360 live_data.update(result.payload)
5362 wrongnode_inst.add(inst)
5364 # orphan instance; we don't list it here as we don't
5365 # handle this case yet in the output of instance listing
5366 logging.warning("Orphan instance '%s' found on node %s",
5368 # else no instance is alive
5372 if query.IQ_DISKUSAGE in self.requested_data:
5373 disk_usage = dict((inst.name,
5374 _ComputeDiskSize(inst.disk_template,
5375 [{constants.IDISK_SIZE: disk.size}
5376 for disk in inst.disks]))
5377 for inst in instance_list)
5381 if query.IQ_CONSOLE in self.requested_data:
5383 for inst in instance_list:
5384 if inst.name in live_data:
5385 # Instance is running
5386 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5388 consinfo[inst.name] = None
5389 assert set(consinfo.keys()) == set(instance_names)
5393 if query.IQ_NODES in self.requested_data:
5394 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5396 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5397 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5398 for uuid in set(map(operator.attrgetter("group"),
5404 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5405 disk_usage, offline_nodes, bad_nodes,
5406 live_data, wrongnode_inst, consinfo,
5410 class LUQuery(NoHooksLU):
5411 """Query for resources/items of a certain kind.
5414 # pylint: disable=W0142
5417 def CheckArguments(self):
5418 qcls = _GetQueryImplementation(self.op.what)
5420 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5422 def ExpandNames(self):
5423 self.impl.ExpandNames(self)
5425 def DeclareLocks(self, level):
5426 self.impl.DeclareLocks(self, level)
5428 def Exec(self, feedback_fn):
5429 return self.impl.NewStyleQuery(self)
5432 class LUQueryFields(NoHooksLU):
5433 """Query for resources/items of a certain kind.
5436 # pylint: disable=W0142
5439 def CheckArguments(self):
5440 self.qcls = _GetQueryImplementation(self.op.what)
5442 def ExpandNames(self):
5443 self.needed_locks = {}
5445 def Exec(self, feedback_fn):
5446 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5449 class LUNodeModifyStorage(NoHooksLU):
5450 """Logical unit for modifying a storage volume on a node.
5455 def CheckArguments(self):
5456 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5458 storage_type = self.op.storage_type
5461 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5463 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5464 " modified" % storage_type,
5467 diff = set(self.op.changes.keys()) - modifiable
5469 raise errors.OpPrereqError("The following fields can not be modified for"
5470 " storage units of type '%s': %r" %
5471 (storage_type, list(diff)),
5474 def ExpandNames(self):
5475 self.needed_locks = {
5476 locking.LEVEL_NODE: self.op.node_name,
5479 def Exec(self, feedback_fn):
5480 """Computes the list of nodes and their attributes.
5483 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5484 result = self.rpc.call_storage_modify(self.op.node_name,
5485 self.op.storage_type, st_args,
5486 self.op.name, self.op.changes)
5487 result.Raise("Failed to modify storage unit '%s' on %s" %
5488 (self.op.name, self.op.node_name))
5491 class LUNodeAdd(LogicalUnit):
5492 """Logical unit for adding node to the cluster.
5496 HTYPE = constants.HTYPE_NODE
5497 _NFLAGS = ["master_capable", "vm_capable"]
5499 def CheckArguments(self):
5500 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5501 # validate/normalize the node name
5502 self.hostname = netutils.GetHostname(name=self.op.node_name,
5503 family=self.primary_ip_family)
5504 self.op.node_name = self.hostname.name
5506 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5507 raise errors.OpPrereqError("Cannot readd the master node",
5510 if self.op.readd and self.op.group:
5511 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5512 " being readded", errors.ECODE_INVAL)
5514 def BuildHooksEnv(self):
5517 This will run on all nodes before, and on all nodes + the new node after.
5521 "OP_TARGET": self.op.node_name,
5522 "NODE_NAME": self.op.node_name,
5523 "NODE_PIP": self.op.primary_ip,
5524 "NODE_SIP": self.op.secondary_ip,
5525 "MASTER_CAPABLE": str(self.op.master_capable),
5526 "VM_CAPABLE": str(self.op.vm_capable),
5529 def BuildHooksNodes(self):
5530 """Build hooks nodes.
5533 # Exclude added node
5534 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5535 post_nodes = pre_nodes + [self.op.node_name, ]
5537 return (pre_nodes, post_nodes)
5539 def CheckPrereq(self):
5540 """Check prerequisites.
5543 - the new node is not already in the config
5545 - its parameters (single/dual homed) matches the cluster
5547 Any errors are signaled by raising errors.OpPrereqError.
5551 hostname = self.hostname
5552 node = hostname.name
5553 primary_ip = self.op.primary_ip = hostname.ip
5554 if self.op.secondary_ip is None:
5555 if self.primary_ip_family == netutils.IP6Address.family:
5556 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5557 " IPv4 address must be given as secondary",
5559 self.op.secondary_ip = primary_ip
5561 secondary_ip = self.op.secondary_ip
5562 if not netutils.IP4Address.IsValid(secondary_ip):
5563 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5564 " address" % secondary_ip, errors.ECODE_INVAL)
5566 node_list = cfg.GetNodeList()
5567 if not self.op.readd and node in node_list:
5568 raise errors.OpPrereqError("Node %s is already in the configuration" %
5569 node, errors.ECODE_EXISTS)
5570 elif self.op.readd and node not in node_list:
5571 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5574 self.changed_primary_ip = False
5576 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5577 if self.op.readd and node == existing_node_name:
5578 if existing_node.secondary_ip != secondary_ip:
5579 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5580 " address configuration as before",
5582 if existing_node.primary_ip != primary_ip:
5583 self.changed_primary_ip = True
5587 if (existing_node.primary_ip == primary_ip or
5588 existing_node.secondary_ip == primary_ip or
5589 existing_node.primary_ip == secondary_ip or
5590 existing_node.secondary_ip == secondary_ip):
5591 raise errors.OpPrereqError("New node ip address(es) conflict with"
5592 " existing node %s" % existing_node.name,
5593 errors.ECODE_NOTUNIQUE)
5595 # After this 'if' block, None is no longer a valid value for the
5596 # _capable op attributes
5598 old_node = self.cfg.GetNodeInfo(node)
5599 assert old_node is not None, "Can't retrieve locked node %s" % node
5600 for attr in self._NFLAGS:
5601 if getattr(self.op, attr) is None:
5602 setattr(self.op, attr, getattr(old_node, attr))
5604 for attr in self._NFLAGS:
5605 if getattr(self.op, attr) is None:
5606 setattr(self.op, attr, True)
5608 if self.op.readd and not self.op.vm_capable:
5609 pri, sec = cfg.GetNodeInstances(node)
5611 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5612 " flag set to false, but it already holds"
5613 " instances" % node,
5616 # check that the type of the node (single versus dual homed) is the
5617 # same as for the master
5618 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5619 master_singlehomed = myself.secondary_ip == myself.primary_ip
5620 newbie_singlehomed = secondary_ip == primary_ip
5621 if master_singlehomed != newbie_singlehomed:
5622 if master_singlehomed:
5623 raise errors.OpPrereqError("The master has no secondary ip but the"
5624 " new node has one",
5627 raise errors.OpPrereqError("The master has a secondary ip but the"
5628 " new node doesn't have one",
5631 # checks reachability
5632 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5633 raise errors.OpPrereqError("Node not reachable by ping",
5634 errors.ECODE_ENVIRON)
5636 if not newbie_singlehomed:
5637 # check reachability from my secondary ip to newbie's secondary ip
5638 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5639 source=myself.secondary_ip):
5640 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5641 " based ping to node daemon port",
5642 errors.ECODE_ENVIRON)
5649 if self.op.master_capable:
5650 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5652 self.master_candidate = False
5655 self.new_node = old_node
5657 node_group = cfg.LookupNodeGroup(self.op.group)
5658 self.new_node = objects.Node(name=node,
5659 primary_ip=primary_ip,
5660 secondary_ip=secondary_ip,
5661 master_candidate=self.master_candidate,
5662 offline=False, drained=False,
5665 if self.op.ndparams:
5666 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5668 if self.op.hv_state:
5669 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5671 if self.op.disk_state:
5672 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5674 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5675 # it a property on the base class.
5676 result = rpc.DnsOnlyRunner().call_version([node])[node]
5677 result.Raise("Can't get version information from node %s" % node)
5678 if constants.PROTOCOL_VERSION == result.payload:
5679 logging.info("Communication to node %s fine, sw version %s match",
5680 node, result.payload)
5682 raise errors.OpPrereqError("Version mismatch master version %s,"
5683 " node version %s" %
5684 (constants.PROTOCOL_VERSION, result.payload),
5685 errors.ECODE_ENVIRON)
5687 def Exec(self, feedback_fn):
5688 """Adds the new node to the cluster.
5691 new_node = self.new_node
5692 node = new_node.name
5694 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5697 # We adding a new node so we assume it's powered
5698 new_node.powered = True
5700 # for re-adds, reset the offline/drained/master-candidate flags;
5701 # we need to reset here, otherwise offline would prevent RPC calls
5702 # later in the procedure; this also means that if the re-add
5703 # fails, we are left with a non-offlined, broken node
5705 new_node.drained = new_node.offline = False # pylint: disable=W0201
5706 self.LogInfo("Readding a node, the offline/drained flags were reset")
5707 # if we demote the node, we do cleanup later in the procedure
5708 new_node.master_candidate = self.master_candidate
5709 if self.changed_primary_ip:
5710 new_node.primary_ip = self.op.primary_ip
5712 # copy the master/vm_capable flags
5713 for attr in self._NFLAGS:
5714 setattr(new_node, attr, getattr(self.op, attr))
5716 # notify the user about any possible mc promotion
5717 if new_node.master_candidate:
5718 self.LogInfo("Node will be a master candidate")
5720 if self.op.ndparams:
5721 new_node.ndparams = self.op.ndparams
5723 new_node.ndparams = {}
5725 if self.op.hv_state:
5726 new_node.hv_state_static = self.new_hv_state
5728 if self.op.disk_state:
5729 new_node.disk_state_static = self.new_disk_state
5731 # Add node to our /etc/hosts, and add key to known_hosts
5732 if self.cfg.GetClusterInfo().modify_etc_hosts:
5733 master_node = self.cfg.GetMasterNode()
5734 result = self.rpc.call_etc_hosts_modify(master_node,
5735 constants.ETC_HOSTS_ADD,
5738 result.Raise("Can't update hosts file with new host data")
5740 if new_node.secondary_ip != new_node.primary_ip:
5741 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5744 node_verify_list = [self.cfg.GetMasterNode()]
5745 node_verify_param = {
5746 constants.NV_NODELIST: ([node], {}),
5747 # TODO: do a node-net-test as well?
5750 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5751 self.cfg.GetClusterName())
5752 for verifier in node_verify_list:
5753 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5754 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5756 for failed in nl_payload:
5757 feedback_fn("ssh/hostname verification failed"
5758 " (checking from %s): %s" %
5759 (verifier, nl_payload[failed]))
5760 raise errors.OpExecError("ssh/hostname verification failed")
5763 _RedistributeAncillaryFiles(self)
5764 self.context.ReaddNode(new_node)
5765 # make sure we redistribute the config
5766 self.cfg.Update(new_node, feedback_fn)
5767 # and make sure the new node will not have old files around
5768 if not new_node.master_candidate:
5769 result = self.rpc.call_node_demote_from_mc(new_node.name)
5770 msg = result.fail_msg
5772 self.LogWarning("Node failed to demote itself from master"
5773 " candidate status: %s" % msg)
5775 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5776 additional_vm=self.op.vm_capable)
5777 self.context.AddNode(new_node, self.proc.GetECId())
5780 class LUNodeSetParams(LogicalUnit):
5781 """Modifies the parameters of a node.
5783 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5784 to the node role (as _ROLE_*)
5785 @cvar _R2F: a dictionary from node role to tuples of flags
5786 @cvar _FLAGS: a list of attribute names corresponding to the flags
5789 HPATH = "node-modify"
5790 HTYPE = constants.HTYPE_NODE
5792 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5794 (True, False, False): _ROLE_CANDIDATE,
5795 (False, True, False): _ROLE_DRAINED,
5796 (False, False, True): _ROLE_OFFLINE,
5797 (False, False, False): _ROLE_REGULAR,
5799 _R2F = dict((v, k) for k, v in _F2R.items())
5800 _FLAGS = ["master_candidate", "drained", "offline"]
5802 def CheckArguments(self):
5803 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5804 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5805 self.op.master_capable, self.op.vm_capable,
5806 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5808 if all_mods.count(None) == len(all_mods):
5809 raise errors.OpPrereqError("Please pass at least one modification",
5811 if all_mods.count(True) > 1:
5812 raise errors.OpPrereqError("Can't set the node into more than one"
5813 " state at the same time",
5816 # Boolean value that tells us whether we might be demoting from MC
5817 self.might_demote = (self.op.master_candidate == False or
5818 self.op.offline == True or
5819 self.op.drained == True or
5820 self.op.master_capable == False)
5822 if self.op.secondary_ip:
5823 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5824 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5825 " address" % self.op.secondary_ip,
5828 self.lock_all = self.op.auto_promote and self.might_demote
5829 self.lock_instances = self.op.secondary_ip is not None
5831 def _InstanceFilter(self, instance):
5832 """Filter for getting affected instances.
5835 return (instance.disk_template in constants.DTS_INT_MIRROR and
5836 self.op.node_name in instance.all_nodes)
5838 def ExpandNames(self):
5840 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5842 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5844 # Since modifying a node can have severe effects on currently running
5845 # operations the resource lock is at least acquired in shared mode
5846 self.needed_locks[locking.LEVEL_NODE_RES] = \
5847 self.needed_locks[locking.LEVEL_NODE]
5849 # Get node resource and instance locks in shared mode; they are not used
5850 # for anything but read-only access
5851 self.share_locks[locking.LEVEL_NODE_RES] = 1
5852 self.share_locks[locking.LEVEL_INSTANCE] = 1
5854 if self.lock_instances:
5855 self.needed_locks[locking.LEVEL_INSTANCE] = \
5856 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5858 def BuildHooksEnv(self):
5861 This runs on the master node.
5865 "OP_TARGET": self.op.node_name,
5866 "MASTER_CANDIDATE": str(self.op.master_candidate),
5867 "OFFLINE": str(self.op.offline),
5868 "DRAINED": str(self.op.drained),
5869 "MASTER_CAPABLE": str(self.op.master_capable),
5870 "VM_CAPABLE": str(self.op.vm_capable),
5873 def BuildHooksNodes(self):
5874 """Build hooks nodes.
5877 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5880 def CheckPrereq(self):
5881 """Check prerequisites.
5883 This only checks the instance list against the existing names.
5886 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5888 if self.lock_instances:
5889 affected_instances = \
5890 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5892 # Verify instance locks
5893 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5894 wanted_instances = frozenset(affected_instances.keys())
5895 if wanted_instances - owned_instances:
5896 raise errors.OpPrereqError("Instances affected by changing node %s's"
5897 " secondary IP address have changed since"
5898 " locks were acquired, wanted '%s', have"
5899 " '%s'; retry the operation" %
5901 utils.CommaJoin(wanted_instances),
5902 utils.CommaJoin(owned_instances)),
5905 affected_instances = None
5907 if (self.op.master_candidate is not None or
5908 self.op.drained is not None or
5909 self.op.offline is not None):
5910 # we can't change the master's node flags
5911 if self.op.node_name == self.cfg.GetMasterNode():
5912 raise errors.OpPrereqError("The master role can be changed"
5913 " only via master-failover",
5916 if self.op.master_candidate and not node.master_capable:
5917 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5918 " it a master candidate" % node.name,
5921 if self.op.vm_capable == False:
5922 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5924 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5925 " the vm_capable flag" % node.name,
5928 if node.master_candidate and self.might_demote and not self.lock_all:
5929 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5930 # check if after removing the current node, we're missing master
5932 (mc_remaining, mc_should, _) = \
5933 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5934 if mc_remaining < mc_should:
5935 raise errors.OpPrereqError("Not enough master candidates, please"
5936 " pass auto promote option to allow"
5937 " promotion (--auto-promote or RAPI"
5938 " auto_promote=True)", errors.ECODE_STATE)
5940 self.old_flags = old_flags = (node.master_candidate,
5941 node.drained, node.offline)
5942 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5943 self.old_role = old_role = self._F2R[old_flags]
5945 # Check for ineffective changes
5946 for attr in self._FLAGS:
5947 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5948 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5949 setattr(self.op, attr, None)
5951 # Past this point, any flag change to False means a transition
5952 # away from the respective state, as only real changes are kept
5954 # TODO: We might query the real power state if it supports OOB
5955 if _SupportsOob(self.cfg, node):
5956 if self.op.offline is False and not (node.powered or
5957 self.op.powered == True):
5958 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5959 " offline status can be reset") %
5961 elif self.op.powered is not None:
5962 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5963 " as it does not support out-of-band"
5964 " handling") % self.op.node_name)
5966 # If we're being deofflined/drained, we'll MC ourself if needed
5967 if (self.op.drained == False or self.op.offline == False or
5968 (self.op.master_capable and not node.master_capable)):
5969 if _DecideSelfPromotion(self):
5970 self.op.master_candidate = True
5971 self.LogInfo("Auto-promoting node to master candidate")
5973 # If we're no longer master capable, we'll demote ourselves from MC
5974 if self.op.master_capable == False and node.master_candidate:
5975 self.LogInfo("Demoting from master candidate")
5976 self.op.master_candidate = False
5979 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5980 if self.op.master_candidate:
5981 new_role = self._ROLE_CANDIDATE
5982 elif self.op.drained:
5983 new_role = self._ROLE_DRAINED
5984 elif self.op.offline:
5985 new_role = self._ROLE_OFFLINE
5986 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5987 # False is still in new flags, which means we're un-setting (the
5989 new_role = self._ROLE_REGULAR
5990 else: # no new flags, nothing, keep old role
5993 self.new_role = new_role
5995 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5996 # Trying to transition out of offline status
5997 result = self.rpc.call_version([node.name])[node.name]
5999 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6000 " to report its version: %s" %
6001 (node.name, result.fail_msg),
6004 self.LogWarning("Transitioning node from offline to online state"
6005 " without using re-add. Please make sure the node"
6008 # When changing the secondary ip, verify if this is a single-homed to
6009 # multi-homed transition or vice versa, and apply the relevant
6011 if self.op.secondary_ip:
6012 # Ok even without locking, because this can't be changed by any LU
6013 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6014 master_singlehomed = master.secondary_ip == master.primary_ip
6015 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6016 if self.op.force and node.name == master.name:
6017 self.LogWarning("Transitioning from single-homed to multi-homed"
6018 " cluster. All nodes will require a secondary ip.")
6020 raise errors.OpPrereqError("Changing the secondary ip on a"
6021 " single-homed cluster requires the"
6022 " --force option to be passed, and the"
6023 " target node to be the master",
6025 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6026 if self.op.force and node.name == master.name:
6027 self.LogWarning("Transitioning from multi-homed to single-homed"
6028 " cluster. Secondary IPs will have to be removed.")
6030 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6031 " same as the primary IP on a multi-homed"
6032 " cluster, unless the --force option is"
6033 " passed, and the target node is the"
6034 " master", errors.ECODE_INVAL)
6036 assert not (frozenset(affected_instances) -
6037 self.owned_locks(locking.LEVEL_INSTANCE))
6040 if affected_instances:
6041 raise errors.OpPrereqError("Cannot change secondary IP address:"
6042 " offline node has instances (%s)"
6043 " configured to use it" %
6044 utils.CommaJoin(affected_instances.keys()))
6046 # On online nodes, check that no instances are running, and that
6047 # the node has the new ip and we can reach it.
6048 for instance in affected_instances.values():
6049 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6050 msg="cannot change secondary ip")
6052 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6053 if master.name != node.name:
6054 # check reachability from master secondary ip to new secondary ip
6055 if not netutils.TcpPing(self.op.secondary_ip,
6056 constants.DEFAULT_NODED_PORT,
6057 source=master.secondary_ip):
6058 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6059 " based ping to node daemon port",
6060 errors.ECODE_ENVIRON)
6062 if self.op.ndparams:
6063 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6064 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6065 self.new_ndparams = new_ndparams
6067 if self.op.hv_state:
6068 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6069 self.node.hv_state_static)
6071 if self.op.disk_state:
6072 self.new_disk_state = \
6073 _MergeAndVerifyDiskState(self.op.disk_state,
6074 self.node.disk_state_static)
6076 def Exec(self, feedback_fn):
6081 old_role = self.old_role
6082 new_role = self.new_role
6086 if self.op.ndparams:
6087 node.ndparams = self.new_ndparams
6089 if self.op.powered is not None:
6090 node.powered = self.op.powered
6092 if self.op.hv_state:
6093 node.hv_state_static = self.new_hv_state
6095 if self.op.disk_state:
6096 node.disk_state_static = self.new_disk_state
6098 for attr in ["master_capable", "vm_capable"]:
6099 val = getattr(self.op, attr)
6101 setattr(node, attr, val)
6102 result.append((attr, str(val)))
6104 if new_role != old_role:
6105 # Tell the node to demote itself, if no longer MC and not offline
6106 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6107 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6109 self.LogWarning("Node failed to demote itself: %s", msg)
6111 new_flags = self._R2F[new_role]
6112 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6114 result.append((desc, str(nf)))
6115 (node.master_candidate, node.drained, node.offline) = new_flags
6117 # we locked all nodes, we adjust the CP before updating this node
6119 _AdjustCandidatePool(self, [node.name])
6121 if self.op.secondary_ip:
6122 node.secondary_ip = self.op.secondary_ip
6123 result.append(("secondary_ip", self.op.secondary_ip))
6125 # this will trigger configuration file update, if needed
6126 self.cfg.Update(node, feedback_fn)
6128 # this will trigger job queue propagation or cleanup if the mc
6130 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6131 self.context.ReaddNode(node)
6136 class LUNodePowercycle(NoHooksLU):
6137 """Powercycles a node.
6142 def CheckArguments(self):
6143 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6144 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6145 raise errors.OpPrereqError("The node is the master and the force"
6146 " parameter was not set",
6149 def ExpandNames(self):
6150 """Locking for PowercycleNode.
6152 This is a last-resort option and shouldn't block on other
6153 jobs. Therefore, we grab no locks.
6156 self.needed_locks = {}
6158 def Exec(self, feedback_fn):
6162 result = self.rpc.call_node_powercycle(self.op.node_name,
6163 self.cfg.GetHypervisorType())
6164 result.Raise("Failed to schedule the reboot")
6165 return result.payload
6168 class LUClusterQuery(NoHooksLU):
6169 """Query cluster configuration.
6174 def ExpandNames(self):
6175 self.needed_locks = {}
6177 def Exec(self, feedback_fn):
6178 """Return cluster config.
6181 cluster = self.cfg.GetClusterInfo()
6184 # Filter just for enabled hypervisors
6185 for os_name, hv_dict in cluster.os_hvp.items():
6186 os_hvp[os_name] = {}
6187 for hv_name, hv_params in hv_dict.items():
6188 if hv_name in cluster.enabled_hypervisors:
6189 os_hvp[os_name][hv_name] = hv_params
6191 # Convert ip_family to ip_version
6192 primary_ip_version = constants.IP4_VERSION
6193 if cluster.primary_ip_family == netutils.IP6Address.family:
6194 primary_ip_version = constants.IP6_VERSION
6197 "software_version": constants.RELEASE_VERSION,
6198 "protocol_version": constants.PROTOCOL_VERSION,
6199 "config_version": constants.CONFIG_VERSION,
6200 "os_api_version": max(constants.OS_API_VERSIONS),
6201 "export_version": constants.EXPORT_VERSION,
6202 "architecture": runtime.GetArchInfo(),
6203 "name": cluster.cluster_name,
6204 "master": cluster.master_node,
6205 "default_hypervisor": cluster.primary_hypervisor,
6206 "enabled_hypervisors": cluster.enabled_hypervisors,
6207 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6208 for hypervisor_name in cluster.enabled_hypervisors]),
6210 "beparams": cluster.beparams,
6211 "osparams": cluster.osparams,
6212 "ipolicy": cluster.ipolicy,
6213 "nicparams": cluster.nicparams,
6214 "ndparams": cluster.ndparams,
6215 "diskparams": cluster.diskparams,
6216 "candidate_pool_size": cluster.candidate_pool_size,
6217 "master_netdev": cluster.master_netdev,
6218 "master_netmask": cluster.master_netmask,
6219 "use_external_mip_script": cluster.use_external_mip_script,
6220 "volume_group_name": cluster.volume_group_name,
6221 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6222 "file_storage_dir": cluster.file_storage_dir,
6223 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6224 "maintain_node_health": cluster.maintain_node_health,
6225 "ctime": cluster.ctime,
6226 "mtime": cluster.mtime,
6227 "uuid": cluster.uuid,
6228 "tags": list(cluster.GetTags()),
6229 "uid_pool": cluster.uid_pool,
6230 "default_iallocator": cluster.default_iallocator,
6231 "reserved_lvs": cluster.reserved_lvs,
6232 "primary_ip_version": primary_ip_version,
6233 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6234 "hidden_os": cluster.hidden_os,
6235 "blacklisted_os": cluster.blacklisted_os,
6241 class LUClusterConfigQuery(NoHooksLU):
6242 """Return configuration values.
6247 def CheckArguments(self):
6248 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6250 def ExpandNames(self):
6251 self.cq.ExpandNames(self)
6253 def DeclareLocks(self, level):
6254 self.cq.DeclareLocks(self, level)
6256 def Exec(self, feedback_fn):
6257 result = self.cq.OldStyleQuery(self)
6259 assert len(result) == 1
6264 class _ClusterQuery(_QueryBase):
6265 FIELDS = query.CLUSTER_FIELDS
6267 #: Do not sort (there is only one item)
6270 def ExpandNames(self, lu):
6271 lu.needed_locks = {}
6273 # The following variables interact with _QueryBase._GetNames
6274 self.wanted = locking.ALL_SET
6275 self.do_locking = self.use_locking
6278 raise errors.OpPrereqError("Can not use locking for cluster queries",
6281 def DeclareLocks(self, lu, level):
6284 def _GetQueryData(self, lu):
6285 """Computes the list of nodes and their attributes.
6288 # Locking is not used
6289 assert not (compat.any(lu.glm.is_owned(level)
6290 for level in locking.LEVELS
6291 if level != locking.LEVEL_CLUSTER) or
6292 self.do_locking or self.use_locking)
6294 if query.CQ_CONFIG in self.requested_data:
6295 cluster = lu.cfg.GetClusterInfo()
6297 cluster = NotImplemented
6299 if query.CQ_QUEUE_DRAINED in self.requested_data:
6300 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6302 drain_flag = NotImplemented
6304 if query.CQ_WATCHER_PAUSE in self.requested_data:
6305 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6307 watcher_pause = NotImplemented
6309 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6312 class LUInstanceActivateDisks(NoHooksLU):
6313 """Bring up an instance's disks.
6318 def ExpandNames(self):
6319 self._ExpandAndLockInstance()
6320 self.needed_locks[locking.LEVEL_NODE] = []
6321 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6323 def DeclareLocks(self, level):
6324 if level == locking.LEVEL_NODE:
6325 self._LockInstancesNodes()
6327 def CheckPrereq(self):
6328 """Check prerequisites.
6330 This checks that the instance is in the cluster.
6333 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6334 assert self.instance is not None, \
6335 "Cannot retrieve locked instance %s" % self.op.instance_name
6336 _CheckNodeOnline(self, self.instance.primary_node)
6338 def Exec(self, feedback_fn):
6339 """Activate the disks.
6342 disks_ok, disks_info = \
6343 _AssembleInstanceDisks(self, self.instance,
6344 ignore_size=self.op.ignore_size)
6346 raise errors.OpExecError("Cannot activate block devices")
6348 if self.op.wait_for_sync:
6349 if not _WaitForSync(self, self.instance):
6350 raise errors.OpExecError("Some disks of the instance are degraded!")
6355 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6357 """Prepare the block devices for an instance.
6359 This sets up the block devices on all nodes.
6361 @type lu: L{LogicalUnit}
6362 @param lu: the logical unit on whose behalf we execute
6363 @type instance: L{objects.Instance}
6364 @param instance: the instance for whose disks we assemble
6365 @type disks: list of L{objects.Disk} or None
6366 @param disks: which disks to assemble (or all, if None)
6367 @type ignore_secondaries: boolean
6368 @param ignore_secondaries: if true, errors on secondary nodes
6369 won't result in an error return from the function
6370 @type ignore_size: boolean
6371 @param ignore_size: if true, the current known size of the disk
6372 will not be used during the disk activation, useful for cases
6373 when the size is wrong
6374 @return: False if the operation failed, otherwise a list of
6375 (host, instance_visible_name, node_visible_name)
6376 with the mapping from node devices to instance devices
6381 iname = instance.name
6382 disks = _ExpandCheckDisks(instance, disks)
6384 # With the two passes mechanism we try to reduce the window of
6385 # opportunity for the race condition of switching DRBD to primary
6386 # before handshaking occured, but we do not eliminate it
6388 # The proper fix would be to wait (with some limits) until the
6389 # connection has been made and drbd transitions from WFConnection
6390 # into any other network-connected state (Connected, SyncTarget,
6393 # 1st pass, assemble on all nodes in secondary mode
6394 for idx, inst_disk in enumerate(disks):
6395 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6397 node_disk = node_disk.Copy()
6398 node_disk.UnsetSize()
6399 lu.cfg.SetDiskID(node_disk, node)
6400 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6402 msg = result.fail_msg
6404 is_offline_secondary = (node in instance.secondary_nodes and
6406 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6407 " (is_primary=False, pass=1): %s",
6408 inst_disk.iv_name, node, msg)
6409 if not (ignore_secondaries or is_offline_secondary):
6412 # FIXME: race condition on drbd migration to primary
6414 # 2nd pass, do only the primary node
6415 for idx, inst_disk in enumerate(disks):
6418 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6419 if node != instance.primary_node:
6422 node_disk = node_disk.Copy()
6423 node_disk.UnsetSize()
6424 lu.cfg.SetDiskID(node_disk, node)
6425 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6427 msg = result.fail_msg
6429 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6430 " (is_primary=True, pass=2): %s",
6431 inst_disk.iv_name, node, msg)
6434 dev_path = result.payload
6436 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6438 # leave the disks configured for the primary node
6439 # this is a workaround that would be fixed better by
6440 # improving the logical/physical id handling
6442 lu.cfg.SetDiskID(disk, instance.primary_node)
6444 return disks_ok, device_info
6447 def _StartInstanceDisks(lu, instance, force):
6448 """Start the disks of an instance.
6451 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6452 ignore_secondaries=force)
6454 _ShutdownInstanceDisks(lu, instance)
6455 if force is not None and not force:
6456 lu.proc.LogWarning("", hint="If the message above refers to a"
6458 " you can retry the operation using '--force'.")
6459 raise errors.OpExecError("Disk consistency error")
6462 class LUInstanceDeactivateDisks(NoHooksLU):
6463 """Shutdown an instance's disks.
6468 def ExpandNames(self):
6469 self._ExpandAndLockInstance()
6470 self.needed_locks[locking.LEVEL_NODE] = []
6471 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6473 def DeclareLocks(self, level):
6474 if level == locking.LEVEL_NODE:
6475 self._LockInstancesNodes()
6477 def CheckPrereq(self):
6478 """Check prerequisites.
6480 This checks that the instance is in the cluster.
6483 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6484 assert self.instance is not None, \
6485 "Cannot retrieve locked instance %s" % self.op.instance_name
6487 def Exec(self, feedback_fn):
6488 """Deactivate the disks
6491 instance = self.instance
6493 _ShutdownInstanceDisks(self, instance)
6495 _SafeShutdownInstanceDisks(self, instance)
6498 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6499 """Shutdown block devices of an instance.
6501 This function checks if an instance is running, before calling
6502 _ShutdownInstanceDisks.
6505 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6506 _ShutdownInstanceDisks(lu, instance, disks=disks)
6509 def _ExpandCheckDisks(instance, disks):
6510 """Return the instance disks selected by the disks list
6512 @type disks: list of L{objects.Disk} or None
6513 @param disks: selected disks
6514 @rtype: list of L{objects.Disk}
6515 @return: selected instance disks to act on
6519 return instance.disks
6521 if not set(disks).issubset(instance.disks):
6522 raise errors.ProgrammerError("Can only act on disks belonging to the"
6527 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6528 """Shutdown block devices of an instance.
6530 This does the shutdown on all nodes of the instance.
6532 If the ignore_primary is false, errors on the primary node are
6537 disks = _ExpandCheckDisks(instance, disks)
6540 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6541 lu.cfg.SetDiskID(top_disk, node)
6542 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6543 msg = result.fail_msg
6545 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6546 disk.iv_name, node, msg)
6547 if ((node == instance.primary_node and not ignore_primary) or
6548 (node != instance.primary_node and not result.offline)):
6553 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6554 """Checks if a node has enough free memory.
6556 This function check if a given node has the needed amount of free
6557 memory. In case the node has less memory or we cannot get the
6558 information from the node, this function raise an OpPrereqError
6561 @type lu: C{LogicalUnit}
6562 @param lu: a logical unit from which we get configuration data
6564 @param node: the node to check
6565 @type reason: C{str}
6566 @param reason: string to use in the error message
6567 @type requested: C{int}
6568 @param requested: the amount of memory in MiB to check for
6569 @type hypervisor_name: C{str}
6570 @param hypervisor_name: the hypervisor to ask for memory stats
6572 @return: node current free memory
6573 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6574 we cannot check the node
6577 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6578 nodeinfo[node].Raise("Can't get data from node %s" % node,
6579 prereq=True, ecode=errors.ECODE_ENVIRON)
6580 (_, _, (hv_info, )) = nodeinfo[node].payload
6582 free_mem = hv_info.get("memory_free", None)
6583 if not isinstance(free_mem, int):
6584 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6585 " was '%s'" % (node, free_mem),
6586 errors.ECODE_ENVIRON)
6587 if requested > free_mem:
6588 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6589 " needed %s MiB, available %s MiB" %
6590 (node, reason, requested, free_mem),
6595 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6596 """Checks if nodes have enough free disk space in the all VGs.
6598 This function check if all given nodes have the needed amount of
6599 free disk. In case any node has less disk or we cannot get the
6600 information from the node, this function raise an OpPrereqError
6603 @type lu: C{LogicalUnit}
6604 @param lu: a logical unit from which we get configuration data
6605 @type nodenames: C{list}
6606 @param nodenames: the list of node names to check
6607 @type req_sizes: C{dict}
6608 @param req_sizes: the hash of vg and corresponding amount of disk in
6610 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6611 or we cannot check the node
6614 for vg, req_size in req_sizes.items():
6615 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6618 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6619 """Checks if nodes have enough free disk space in the specified VG.
6621 This function check if all given nodes have the needed amount of
6622 free disk. In case any node has less disk or we cannot get the
6623 information from the node, this function raise an OpPrereqError
6626 @type lu: C{LogicalUnit}
6627 @param lu: a logical unit from which we get configuration data
6628 @type nodenames: C{list}
6629 @param nodenames: the list of node names to check
6631 @param vg: the volume group to check
6632 @type requested: C{int}
6633 @param requested: the amount of disk in MiB to check for
6634 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6635 or we cannot check the node
6638 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6639 for node in nodenames:
6640 info = nodeinfo[node]
6641 info.Raise("Cannot get current information from node %s" % node,
6642 prereq=True, ecode=errors.ECODE_ENVIRON)
6643 (_, (vg_info, ), _) = info.payload
6644 vg_free = vg_info.get("vg_free", None)
6645 if not isinstance(vg_free, int):
6646 raise errors.OpPrereqError("Can't compute free disk space on node"
6647 " %s for vg %s, result was '%s'" %
6648 (node, vg, vg_free), errors.ECODE_ENVIRON)
6649 if requested > vg_free:
6650 raise errors.OpPrereqError("Not enough disk space on target node %s"
6651 " vg %s: required %d MiB, available %d MiB" %
6652 (node, vg, requested, vg_free),
6656 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6657 """Checks if nodes have enough physical CPUs
6659 This function checks if all given nodes have the needed number of
6660 physical CPUs. In case any node has less CPUs or we cannot get the
6661 information from the node, this function raises an OpPrereqError
6664 @type lu: C{LogicalUnit}
6665 @param lu: a logical unit from which we get configuration data
6666 @type nodenames: C{list}
6667 @param nodenames: the list of node names to check
6668 @type requested: C{int}
6669 @param requested: the minimum acceptable number of physical CPUs
6670 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6671 or we cannot check the node
6674 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6675 for node in nodenames:
6676 info = nodeinfo[node]
6677 info.Raise("Cannot get current information from node %s" % node,
6678 prereq=True, ecode=errors.ECODE_ENVIRON)
6679 (_, _, (hv_info, )) = info.payload
6680 num_cpus = hv_info.get("cpu_total", None)
6681 if not isinstance(num_cpus, int):
6682 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6683 " on node %s, result was '%s'" %
6684 (node, num_cpus), errors.ECODE_ENVIRON)
6685 if requested > num_cpus:
6686 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6687 "required" % (node, num_cpus, requested),
6691 class LUInstanceStartup(LogicalUnit):
6692 """Starts an instance.
6695 HPATH = "instance-start"
6696 HTYPE = constants.HTYPE_INSTANCE
6699 def CheckArguments(self):
6701 if self.op.beparams:
6702 # fill the beparams dict
6703 objects.UpgradeBeParams(self.op.beparams)
6704 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6706 def ExpandNames(self):
6707 self._ExpandAndLockInstance()
6708 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6710 def DeclareLocks(self, level):
6711 if level == locking.LEVEL_NODE_RES:
6712 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6714 def BuildHooksEnv(self):
6717 This runs on master, primary and secondary nodes of the instance.
6721 "FORCE": self.op.force,
6724 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6728 def BuildHooksNodes(self):
6729 """Build hooks nodes.
6732 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6735 def CheckPrereq(self):
6736 """Check prerequisites.
6738 This checks that the instance is in the cluster.
6741 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6742 assert self.instance is not None, \
6743 "Cannot retrieve locked instance %s" % self.op.instance_name
6746 if self.op.hvparams:
6747 # check hypervisor parameter syntax (locally)
6748 cluster = self.cfg.GetClusterInfo()
6749 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6750 filled_hvp = cluster.FillHV(instance)
6751 filled_hvp.update(self.op.hvparams)
6752 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6753 hv_type.CheckParameterSyntax(filled_hvp)
6754 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6756 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6758 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6760 if self.primary_offline and self.op.ignore_offline_nodes:
6761 self.proc.LogWarning("Ignoring offline primary node")
6763 if self.op.hvparams or self.op.beparams:
6764 self.proc.LogWarning("Overridden parameters are ignored")
6766 _CheckNodeOnline(self, instance.primary_node)
6768 bep = self.cfg.GetClusterInfo().FillBE(instance)
6769 bep.update(self.op.beparams)
6771 # check bridges existence
6772 _CheckInstanceBridgesExist(self, instance)
6774 remote_info = self.rpc.call_instance_info(instance.primary_node,
6776 instance.hypervisor)
6777 remote_info.Raise("Error checking node %s" % instance.primary_node,
6778 prereq=True, ecode=errors.ECODE_ENVIRON)
6779 if not remote_info.payload: # not running already
6780 _CheckNodeFreeMemory(self, instance.primary_node,
6781 "starting instance %s" % instance.name,
6782 bep[constants.BE_MINMEM], instance.hypervisor)
6784 def Exec(self, feedback_fn):
6785 """Start the instance.
6788 instance = self.instance
6789 force = self.op.force
6791 if not self.op.no_remember:
6792 self.cfg.MarkInstanceUp(instance.name)
6794 if self.primary_offline:
6795 assert self.op.ignore_offline_nodes
6796 self.proc.LogInfo("Primary node offline, marked instance as started")
6798 node_current = instance.primary_node
6800 _StartInstanceDisks(self, instance, force)
6803 self.rpc.call_instance_start(node_current,
6804 (instance, self.op.hvparams,
6806 self.op.startup_paused)
6807 msg = result.fail_msg
6809 _ShutdownInstanceDisks(self, instance)
6810 raise errors.OpExecError("Could not start instance: %s" % msg)
6813 class LUInstanceReboot(LogicalUnit):
6814 """Reboot an instance.
6817 HPATH = "instance-reboot"
6818 HTYPE = constants.HTYPE_INSTANCE
6821 def ExpandNames(self):
6822 self._ExpandAndLockInstance()
6824 def BuildHooksEnv(self):
6827 This runs on master, primary and secondary nodes of the instance.
6831 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6832 "REBOOT_TYPE": self.op.reboot_type,
6833 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6836 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6840 def BuildHooksNodes(self):
6841 """Build hooks nodes.
6844 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6847 def CheckPrereq(self):
6848 """Check prerequisites.
6850 This checks that the instance is in the cluster.
6853 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6854 assert self.instance is not None, \
6855 "Cannot retrieve locked instance %s" % self.op.instance_name
6856 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6857 _CheckNodeOnline(self, instance.primary_node)
6859 # check bridges existence
6860 _CheckInstanceBridgesExist(self, instance)
6862 def Exec(self, feedback_fn):
6863 """Reboot the instance.
6866 instance = self.instance
6867 ignore_secondaries = self.op.ignore_secondaries
6868 reboot_type = self.op.reboot_type
6870 remote_info = self.rpc.call_instance_info(instance.primary_node,
6872 instance.hypervisor)
6873 remote_info.Raise("Error checking node %s" % instance.primary_node)
6874 instance_running = bool(remote_info.payload)
6876 node_current = instance.primary_node
6878 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6879 constants.INSTANCE_REBOOT_HARD]:
6880 for disk in instance.disks:
6881 self.cfg.SetDiskID(disk, node_current)
6882 result = self.rpc.call_instance_reboot(node_current, instance,
6884 self.op.shutdown_timeout)
6885 result.Raise("Could not reboot instance")
6887 if instance_running:
6888 result = self.rpc.call_instance_shutdown(node_current, instance,
6889 self.op.shutdown_timeout)
6890 result.Raise("Could not shutdown instance for full reboot")
6891 _ShutdownInstanceDisks(self, instance)
6893 self.LogInfo("Instance %s was already stopped, starting now",
6895 _StartInstanceDisks(self, instance, ignore_secondaries)
6896 result = self.rpc.call_instance_start(node_current,
6897 (instance, None, None), False)
6898 msg = result.fail_msg
6900 _ShutdownInstanceDisks(self, instance)
6901 raise errors.OpExecError("Could not start instance for"
6902 " full reboot: %s" % msg)
6904 self.cfg.MarkInstanceUp(instance.name)
6907 class LUInstanceShutdown(LogicalUnit):
6908 """Shutdown an instance.
6911 HPATH = "instance-stop"
6912 HTYPE = constants.HTYPE_INSTANCE
6915 def ExpandNames(self):
6916 self._ExpandAndLockInstance()
6918 def BuildHooksEnv(self):
6921 This runs on master, primary and secondary nodes of the instance.
6924 env = _BuildInstanceHookEnvByObject(self, self.instance)
6925 env["TIMEOUT"] = self.op.timeout
6928 def BuildHooksNodes(self):
6929 """Build hooks nodes.
6932 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6935 def CheckPrereq(self):
6936 """Check prerequisites.
6938 This checks that the instance is in the cluster.
6941 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6942 assert self.instance is not None, \
6943 "Cannot retrieve locked instance %s" % self.op.instance_name
6945 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6947 self.primary_offline = \
6948 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6950 if self.primary_offline and self.op.ignore_offline_nodes:
6951 self.proc.LogWarning("Ignoring offline primary node")
6953 _CheckNodeOnline(self, self.instance.primary_node)
6955 def Exec(self, feedback_fn):
6956 """Shutdown the instance.
6959 instance = self.instance
6960 node_current = instance.primary_node
6961 timeout = self.op.timeout
6963 if not self.op.no_remember:
6964 self.cfg.MarkInstanceDown(instance.name)
6966 if self.primary_offline:
6967 assert self.op.ignore_offline_nodes
6968 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6970 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6971 msg = result.fail_msg
6973 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6975 _ShutdownInstanceDisks(self, instance)
6978 class LUInstanceReinstall(LogicalUnit):
6979 """Reinstall an instance.
6982 HPATH = "instance-reinstall"
6983 HTYPE = constants.HTYPE_INSTANCE
6986 def ExpandNames(self):
6987 self._ExpandAndLockInstance()
6989 def BuildHooksEnv(self):
6992 This runs on master, primary and secondary nodes of the instance.
6995 return _BuildInstanceHookEnvByObject(self, self.instance)
6997 def BuildHooksNodes(self):
6998 """Build hooks nodes.
7001 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7004 def CheckPrereq(self):
7005 """Check prerequisites.
7007 This checks that the instance is in the cluster and is not running.
7010 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7011 assert instance is not None, \
7012 "Cannot retrieve locked instance %s" % self.op.instance_name
7013 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7014 " offline, cannot reinstall")
7016 if instance.disk_template == constants.DT_DISKLESS:
7017 raise errors.OpPrereqError("Instance '%s' has no disks" %
7018 self.op.instance_name,
7020 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7022 if self.op.os_type is not None:
7024 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7025 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7026 instance_os = self.op.os_type
7028 instance_os = instance.os
7030 nodelist = list(instance.all_nodes)
7032 if self.op.osparams:
7033 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7034 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7035 self.os_inst = i_osdict # the new dict (without defaults)
7039 self.instance = instance
7041 def Exec(self, feedback_fn):
7042 """Reinstall the instance.
7045 inst = self.instance
7047 if self.op.os_type is not None:
7048 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7049 inst.os = self.op.os_type
7050 # Write to configuration
7051 self.cfg.Update(inst, feedback_fn)
7053 _StartInstanceDisks(self, inst, None)
7055 feedback_fn("Running the instance OS create scripts...")
7056 # FIXME: pass debug option from opcode to backend
7057 result = self.rpc.call_instance_os_add(inst.primary_node,
7058 (inst, self.os_inst), True,
7059 self.op.debug_level)
7060 result.Raise("Could not install OS for instance %s on node %s" %
7061 (inst.name, inst.primary_node))
7063 _ShutdownInstanceDisks(self, inst)
7066 class LUInstanceRecreateDisks(LogicalUnit):
7067 """Recreate an instance's missing disks.
7070 HPATH = "instance-recreate-disks"
7071 HTYPE = constants.HTYPE_INSTANCE
7074 _MODIFYABLE = frozenset([
7075 constants.IDISK_SIZE,
7076 constants.IDISK_MODE,
7079 # New or changed disk parameters may have different semantics
7080 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7081 constants.IDISK_ADOPT,
7083 # TODO: Implement support changing VG while recreating
7085 constants.IDISK_METAVG,
7088 def CheckArguments(self):
7089 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7090 # Normalize and convert deprecated list of disk indices
7091 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7093 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7095 raise errors.OpPrereqError("Some disks have been specified more than"
7096 " once: %s" % utils.CommaJoin(duplicates),
7099 for (idx, params) in self.op.disks:
7100 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7101 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7103 raise errors.OpPrereqError("Parameters for disk %s try to change"
7104 " unmodifyable parameter(s): %s" %
7105 (idx, utils.CommaJoin(unsupported)),
7108 def ExpandNames(self):
7109 self._ExpandAndLockInstance()
7110 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7112 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7113 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7115 self.needed_locks[locking.LEVEL_NODE] = []
7116 self.needed_locks[locking.LEVEL_NODE_RES] = []
7118 def DeclareLocks(self, level):
7119 if level == locking.LEVEL_NODE:
7120 # if we replace the nodes, we only need to lock the old primary,
7121 # otherwise we need to lock all nodes for disk re-creation
7122 primary_only = bool(self.op.nodes)
7123 self._LockInstancesNodes(primary_only=primary_only)
7124 elif level == locking.LEVEL_NODE_RES:
7126 self.needed_locks[locking.LEVEL_NODE_RES] = \
7127 self.needed_locks[locking.LEVEL_NODE][:]
7129 def BuildHooksEnv(self):
7132 This runs on master, primary and secondary nodes of the instance.
7135 return _BuildInstanceHookEnvByObject(self, self.instance)
7137 def BuildHooksNodes(self):
7138 """Build hooks nodes.
7141 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7144 def CheckPrereq(self):
7145 """Check prerequisites.
7147 This checks that the instance is in the cluster and is not running.
7150 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7151 assert instance is not None, \
7152 "Cannot retrieve locked instance %s" % self.op.instance_name
7154 if len(self.op.nodes) != len(instance.all_nodes):
7155 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7156 " %d replacement nodes were specified" %
7157 (instance.name, len(instance.all_nodes),
7158 len(self.op.nodes)),
7160 assert instance.disk_template != constants.DT_DRBD8 or \
7161 len(self.op.nodes) == 2
7162 assert instance.disk_template != constants.DT_PLAIN or \
7163 len(self.op.nodes) == 1
7164 primary_node = self.op.nodes[0]
7166 primary_node = instance.primary_node
7167 _CheckNodeOnline(self, primary_node)
7169 if instance.disk_template == constants.DT_DISKLESS:
7170 raise errors.OpPrereqError("Instance '%s' has no disks" %
7171 self.op.instance_name, errors.ECODE_INVAL)
7173 # if we replace nodes *and* the old primary is offline, we don't
7175 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7176 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7177 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7178 if not (self.op.nodes and old_pnode.offline):
7179 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7180 msg="cannot recreate disks")
7183 self.disks = dict(self.op.disks)
7185 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7187 maxidx = max(self.disks.keys())
7188 if maxidx >= len(instance.disks):
7189 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7192 if (self.op.nodes and
7193 sorted(self.disks.keys()) != range(len(instance.disks))):
7194 raise errors.OpPrereqError("Can't recreate disks partially and"
7195 " change the nodes at the same time",
7198 self.instance = instance
7200 def Exec(self, feedback_fn):
7201 """Recreate the disks.
7204 instance = self.instance
7206 assert (self.owned_locks(locking.LEVEL_NODE) ==
7207 self.owned_locks(locking.LEVEL_NODE_RES))
7210 mods = [] # keeps track of needed changes
7212 for idx, disk in enumerate(instance.disks):
7214 changes = self.disks[idx]
7216 # Disk should not be recreated
7220 # update secondaries for disks, if needed
7221 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7222 # need to update the nodes and minors
7223 assert len(self.op.nodes) == 2
7224 assert len(disk.logical_id) == 6 # otherwise disk internals
7226 (_, _, old_port, _, _, old_secret) = disk.logical_id
7227 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7228 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7229 new_minors[0], new_minors[1], old_secret)
7230 assert len(disk.logical_id) == len(new_id)
7234 mods.append((idx, new_id, changes))
7236 # now that we have passed all asserts above, we can apply the mods
7237 # in a single run (to avoid partial changes)
7238 for idx, new_id, changes in mods:
7239 disk = instance.disks[idx]
7240 if new_id is not None:
7241 assert disk.dev_type == constants.LD_DRBD8
7242 disk.logical_id = new_id
7244 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7245 mode=changes.get(constants.IDISK_MODE, None))
7247 # change primary node, if needed
7249 instance.primary_node = self.op.nodes[0]
7250 self.LogWarning("Changing the instance's nodes, you will have to"
7251 " remove any disks left on the older nodes manually")
7254 self.cfg.Update(instance, feedback_fn)
7256 _CreateDisks(self, instance, to_skip=to_skip)
7259 class LUInstanceRename(LogicalUnit):
7260 """Rename an instance.
7263 HPATH = "instance-rename"
7264 HTYPE = constants.HTYPE_INSTANCE
7266 def CheckArguments(self):
7270 if self.op.ip_check and not self.op.name_check:
7271 # TODO: make the ip check more flexible and not depend on the name check
7272 raise errors.OpPrereqError("IP address check requires a name check",
7275 def BuildHooksEnv(self):
7278 This runs on master, primary and secondary nodes of the instance.
7281 env = _BuildInstanceHookEnvByObject(self, self.instance)
7282 env["INSTANCE_NEW_NAME"] = self.op.new_name
7285 def BuildHooksNodes(self):
7286 """Build hooks nodes.
7289 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7292 def CheckPrereq(self):
7293 """Check prerequisites.
7295 This checks that the instance is in the cluster and is not running.
7298 self.op.instance_name = _ExpandInstanceName(self.cfg,
7299 self.op.instance_name)
7300 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7301 assert instance is not None
7302 _CheckNodeOnline(self, instance.primary_node)
7303 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7304 msg="cannot rename")
7305 self.instance = instance
7307 new_name = self.op.new_name
7308 if self.op.name_check:
7309 hostname = netutils.GetHostname(name=new_name)
7310 if hostname.name != new_name:
7311 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7313 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7314 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7315 " same as given hostname '%s'") %
7316 (hostname.name, self.op.new_name),
7318 new_name = self.op.new_name = hostname.name
7319 if (self.op.ip_check and
7320 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7321 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7322 (hostname.ip, new_name),
7323 errors.ECODE_NOTUNIQUE)
7325 instance_list = self.cfg.GetInstanceList()
7326 if new_name in instance_list and new_name != instance.name:
7327 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7328 new_name, errors.ECODE_EXISTS)
7330 def Exec(self, feedback_fn):
7331 """Rename the instance.
7334 inst = self.instance
7335 old_name = inst.name
7337 rename_file_storage = False
7338 if (inst.disk_template in constants.DTS_FILEBASED and
7339 self.op.new_name != inst.name):
7340 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7341 rename_file_storage = True
7343 self.cfg.RenameInstance(inst.name, self.op.new_name)
7344 # Change the instance lock. This is definitely safe while we hold the BGL.
7345 # Otherwise the new lock would have to be added in acquired mode.
7347 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7348 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7350 # re-read the instance from the configuration after rename
7351 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7353 if rename_file_storage:
7354 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7355 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7356 old_file_storage_dir,
7357 new_file_storage_dir)
7358 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7359 " (but the instance has been renamed in Ganeti)" %
7360 (inst.primary_node, old_file_storage_dir,
7361 new_file_storage_dir))
7363 _StartInstanceDisks(self, inst, None)
7365 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7366 old_name, self.op.debug_level)
7367 msg = result.fail_msg
7369 msg = ("Could not run OS rename script for instance %s on node %s"
7370 " (but the instance has been renamed in Ganeti): %s" %
7371 (inst.name, inst.primary_node, msg))
7372 self.proc.LogWarning(msg)
7374 _ShutdownInstanceDisks(self, inst)
7379 class LUInstanceRemove(LogicalUnit):
7380 """Remove an instance.
7383 HPATH = "instance-remove"
7384 HTYPE = constants.HTYPE_INSTANCE
7387 def ExpandNames(self):
7388 self._ExpandAndLockInstance()
7389 self.needed_locks[locking.LEVEL_NODE] = []
7390 self.needed_locks[locking.LEVEL_NODE_RES] = []
7391 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7393 def DeclareLocks(self, level):
7394 if level == locking.LEVEL_NODE:
7395 self._LockInstancesNodes()
7396 elif level == locking.LEVEL_NODE_RES:
7398 self.needed_locks[locking.LEVEL_NODE_RES] = \
7399 self.needed_locks[locking.LEVEL_NODE][:]
7401 def BuildHooksEnv(self):
7404 This runs on master, primary and secondary nodes of the instance.
7407 env = _BuildInstanceHookEnvByObject(self, self.instance)
7408 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7411 def BuildHooksNodes(self):
7412 """Build hooks nodes.
7415 nl = [self.cfg.GetMasterNode()]
7416 nl_post = list(self.instance.all_nodes) + nl
7417 return (nl, nl_post)
7419 def CheckPrereq(self):
7420 """Check prerequisites.
7422 This checks that the instance is in the cluster.
7425 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7426 assert self.instance is not None, \
7427 "Cannot retrieve locked instance %s" % self.op.instance_name
7429 def Exec(self, feedback_fn):
7430 """Remove the instance.
7433 instance = self.instance
7434 logging.info("Shutting down instance %s on node %s",
7435 instance.name, instance.primary_node)
7437 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7438 self.op.shutdown_timeout)
7439 msg = result.fail_msg
7441 if self.op.ignore_failures:
7442 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7444 raise errors.OpExecError("Could not shutdown instance %s on"
7446 (instance.name, instance.primary_node, msg))
7448 assert (self.owned_locks(locking.LEVEL_NODE) ==
7449 self.owned_locks(locking.LEVEL_NODE_RES))
7450 assert not (set(instance.all_nodes) -
7451 self.owned_locks(locking.LEVEL_NODE)), \
7452 "Not owning correct locks"
7454 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7457 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7458 """Utility function to remove an instance.
7461 logging.info("Removing block devices for instance %s", instance.name)
7463 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7464 if not ignore_failures:
7465 raise errors.OpExecError("Can't remove instance's disks")
7466 feedback_fn("Warning: can't remove instance's disks")
7468 logging.info("Removing instance %s out of cluster config", instance.name)
7470 lu.cfg.RemoveInstance(instance.name)
7472 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7473 "Instance lock removal conflict"
7475 # Remove lock for the instance
7476 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7479 class LUInstanceQuery(NoHooksLU):
7480 """Logical unit for querying instances.
7483 # pylint: disable=W0142
7486 def CheckArguments(self):
7487 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7488 self.op.output_fields, self.op.use_locking)
7490 def ExpandNames(self):
7491 self.iq.ExpandNames(self)
7493 def DeclareLocks(self, level):
7494 self.iq.DeclareLocks(self, level)
7496 def Exec(self, feedback_fn):
7497 return self.iq.OldStyleQuery(self)
7500 class LUInstanceFailover(LogicalUnit):
7501 """Failover an instance.
7504 HPATH = "instance-failover"
7505 HTYPE = constants.HTYPE_INSTANCE
7508 def CheckArguments(self):
7509 """Check the arguments.
7512 self.iallocator = getattr(self.op, "iallocator", None)
7513 self.target_node = getattr(self.op, "target_node", None)
7515 def ExpandNames(self):
7516 self._ExpandAndLockInstance()
7518 if self.op.target_node is not None:
7519 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7521 self.needed_locks[locking.LEVEL_NODE] = []
7522 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7524 self.needed_locks[locking.LEVEL_NODE_RES] = []
7525 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7527 ignore_consistency = self.op.ignore_consistency
7528 shutdown_timeout = self.op.shutdown_timeout
7529 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7532 ignore_consistency=ignore_consistency,
7533 shutdown_timeout=shutdown_timeout,
7534 ignore_ipolicy=self.op.ignore_ipolicy)
7535 self.tasklets = [self._migrater]
7537 def DeclareLocks(self, level):
7538 if level == locking.LEVEL_NODE:
7539 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7540 if instance.disk_template in constants.DTS_EXT_MIRROR:
7541 if self.op.target_node is None:
7542 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7544 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7545 self.op.target_node]
7546 del self.recalculate_locks[locking.LEVEL_NODE]
7548 self._LockInstancesNodes()
7549 elif level == locking.LEVEL_NODE_RES:
7551 self.needed_locks[locking.LEVEL_NODE_RES] = \
7552 self.needed_locks[locking.LEVEL_NODE][:]
7554 def BuildHooksEnv(self):
7557 This runs on master, primary and secondary nodes of the instance.
7560 instance = self._migrater.instance
7561 source_node = instance.primary_node
7562 target_node = self.op.target_node
7564 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7565 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7566 "OLD_PRIMARY": source_node,
7567 "NEW_PRIMARY": target_node,
7570 if instance.disk_template in constants.DTS_INT_MIRROR:
7571 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7572 env["NEW_SECONDARY"] = source_node
7574 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7576 env.update(_BuildInstanceHookEnvByObject(self, instance))
7580 def BuildHooksNodes(self):
7581 """Build hooks nodes.
7584 instance = self._migrater.instance
7585 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7586 return (nl, nl + [instance.primary_node])
7589 class LUInstanceMigrate(LogicalUnit):
7590 """Migrate an instance.
7592 This is migration without shutting down, compared to the failover,
7593 which is done with shutdown.
7596 HPATH = "instance-migrate"
7597 HTYPE = constants.HTYPE_INSTANCE
7600 def ExpandNames(self):
7601 self._ExpandAndLockInstance()
7603 if self.op.target_node is not None:
7604 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7606 self.needed_locks[locking.LEVEL_NODE] = []
7607 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7609 self.needed_locks[locking.LEVEL_NODE] = []
7610 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7613 TLMigrateInstance(self, self.op.instance_name,
7614 cleanup=self.op.cleanup,
7616 fallback=self.op.allow_failover,
7617 allow_runtime_changes=self.op.allow_runtime_changes,
7618 ignore_ipolicy=self.op.ignore_ipolicy)
7619 self.tasklets = [self._migrater]
7621 def DeclareLocks(self, level):
7622 if level == locking.LEVEL_NODE:
7623 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7624 if instance.disk_template in constants.DTS_EXT_MIRROR:
7625 if self.op.target_node is None:
7626 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7628 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7629 self.op.target_node]
7630 del self.recalculate_locks[locking.LEVEL_NODE]
7632 self._LockInstancesNodes()
7633 elif level == locking.LEVEL_NODE_RES:
7635 self.needed_locks[locking.LEVEL_NODE_RES] = \
7636 self.needed_locks[locking.LEVEL_NODE][:]
7638 def BuildHooksEnv(self):
7641 This runs on master, primary and secondary nodes of the instance.
7644 instance = self._migrater.instance
7645 source_node = instance.primary_node
7646 target_node = self.op.target_node
7647 env = _BuildInstanceHookEnvByObject(self, instance)
7649 "MIGRATE_LIVE": self._migrater.live,
7650 "MIGRATE_CLEANUP": self.op.cleanup,
7651 "OLD_PRIMARY": source_node,
7652 "NEW_PRIMARY": target_node,
7653 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7656 if instance.disk_template in constants.DTS_INT_MIRROR:
7657 env["OLD_SECONDARY"] = target_node
7658 env["NEW_SECONDARY"] = source_node
7660 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7664 def BuildHooksNodes(self):
7665 """Build hooks nodes.
7668 instance = self._migrater.instance
7669 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7670 return (nl, nl + [instance.primary_node])
7673 class LUInstanceMove(LogicalUnit):
7674 """Move an instance by data-copying.
7677 HPATH = "instance-move"
7678 HTYPE = constants.HTYPE_INSTANCE
7681 def ExpandNames(self):
7682 self._ExpandAndLockInstance()
7683 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7684 self.op.target_node = target_node
7685 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7686 self.needed_locks[locking.LEVEL_NODE_RES] = []
7687 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7689 def DeclareLocks(self, level):
7690 if level == locking.LEVEL_NODE:
7691 self._LockInstancesNodes(primary_only=True)
7692 elif level == locking.LEVEL_NODE_RES:
7694 self.needed_locks[locking.LEVEL_NODE_RES] = \
7695 self.needed_locks[locking.LEVEL_NODE][:]
7697 def BuildHooksEnv(self):
7700 This runs on master, primary and secondary nodes of the instance.
7704 "TARGET_NODE": self.op.target_node,
7705 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7707 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7710 def BuildHooksNodes(self):
7711 """Build hooks nodes.
7715 self.cfg.GetMasterNode(),
7716 self.instance.primary_node,
7717 self.op.target_node,
7721 def CheckPrereq(self):
7722 """Check prerequisites.
7724 This checks that the instance is in the cluster.
7727 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7728 assert self.instance is not None, \
7729 "Cannot retrieve locked instance %s" % self.op.instance_name
7731 node = self.cfg.GetNodeInfo(self.op.target_node)
7732 assert node is not None, \
7733 "Cannot retrieve locked node %s" % self.op.target_node
7735 self.target_node = target_node = node.name
7737 if target_node == instance.primary_node:
7738 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7739 (instance.name, target_node),
7742 bep = self.cfg.GetClusterInfo().FillBE(instance)
7744 for idx, dsk in enumerate(instance.disks):
7745 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7746 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7747 " cannot copy" % idx, errors.ECODE_STATE)
7749 _CheckNodeOnline(self, target_node)
7750 _CheckNodeNotDrained(self, target_node)
7751 _CheckNodeVmCapable(self, target_node)
7752 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7753 self.cfg.GetNodeGroup(node.group))
7754 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7755 ignore=self.op.ignore_ipolicy)
7757 if instance.admin_state == constants.ADMINST_UP:
7758 # check memory requirements on the secondary node
7759 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7760 instance.name, bep[constants.BE_MAXMEM],
7761 instance.hypervisor)
7763 self.LogInfo("Not checking memory on the secondary node as"
7764 " instance will not be started")
7766 # check bridge existance
7767 _CheckInstanceBridgesExist(self, instance, node=target_node)
7769 def Exec(self, feedback_fn):
7770 """Move an instance.
7772 The move is done by shutting it down on its present node, copying
7773 the data over (slow) and starting it on the new node.
7776 instance = self.instance
7778 source_node = instance.primary_node
7779 target_node = self.target_node
7781 self.LogInfo("Shutting down instance %s on source node %s",
7782 instance.name, source_node)
7784 assert (self.owned_locks(locking.LEVEL_NODE) ==
7785 self.owned_locks(locking.LEVEL_NODE_RES))
7787 result = self.rpc.call_instance_shutdown(source_node, instance,
7788 self.op.shutdown_timeout)
7789 msg = result.fail_msg
7791 if self.op.ignore_consistency:
7792 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7793 " Proceeding anyway. Please make sure node"
7794 " %s is down. Error details: %s",
7795 instance.name, source_node, source_node, msg)
7797 raise errors.OpExecError("Could not shutdown instance %s on"
7799 (instance.name, source_node, msg))
7801 # create the target disks
7803 _CreateDisks(self, instance, target_node=target_node)
7804 except errors.OpExecError:
7805 self.LogWarning("Device creation failed, reverting...")
7807 _RemoveDisks(self, instance, target_node=target_node)
7809 self.cfg.ReleaseDRBDMinors(instance.name)
7812 cluster_name = self.cfg.GetClusterInfo().cluster_name
7815 # activate, get path, copy the data over
7816 for idx, disk in enumerate(instance.disks):
7817 self.LogInfo("Copying data for disk %d", idx)
7818 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7819 instance.name, True, idx)
7821 self.LogWarning("Can't assemble newly created disk %d: %s",
7822 idx, result.fail_msg)
7823 errs.append(result.fail_msg)
7825 dev_path = result.payload
7826 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7827 target_node, dev_path,
7830 self.LogWarning("Can't copy data over for disk %d: %s",
7831 idx, result.fail_msg)
7832 errs.append(result.fail_msg)
7836 self.LogWarning("Some disks failed to copy, aborting")
7838 _RemoveDisks(self, instance, target_node=target_node)
7840 self.cfg.ReleaseDRBDMinors(instance.name)
7841 raise errors.OpExecError("Errors during disk copy: %s" %
7844 instance.primary_node = target_node
7845 self.cfg.Update(instance, feedback_fn)
7847 self.LogInfo("Removing the disks on the original node")
7848 _RemoveDisks(self, instance, target_node=source_node)
7850 # Only start the instance if it's marked as up
7851 if instance.admin_state == constants.ADMINST_UP:
7852 self.LogInfo("Starting instance %s on node %s",
7853 instance.name, target_node)
7855 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7856 ignore_secondaries=True)
7858 _ShutdownInstanceDisks(self, instance)
7859 raise errors.OpExecError("Can't activate the instance's disks")
7861 result = self.rpc.call_instance_start(target_node,
7862 (instance, None, None), False)
7863 msg = result.fail_msg
7865 _ShutdownInstanceDisks(self, instance)
7866 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7867 (instance.name, target_node, msg))
7870 class LUNodeMigrate(LogicalUnit):
7871 """Migrate all instances from a node.
7874 HPATH = "node-migrate"
7875 HTYPE = constants.HTYPE_NODE
7878 def CheckArguments(self):
7881 def ExpandNames(self):
7882 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7884 self.share_locks = _ShareAll()
7885 self.needed_locks = {
7886 locking.LEVEL_NODE: [self.op.node_name],
7889 def BuildHooksEnv(self):
7892 This runs on the master, the primary and all the secondaries.
7896 "NODE_NAME": self.op.node_name,
7897 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7900 def BuildHooksNodes(self):
7901 """Build hooks nodes.
7904 nl = [self.cfg.GetMasterNode()]
7907 def CheckPrereq(self):
7910 def Exec(self, feedback_fn):
7911 # Prepare jobs for migration instances
7912 allow_runtime_changes = self.op.allow_runtime_changes
7914 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7917 iallocator=self.op.iallocator,
7918 target_node=self.op.target_node,
7919 allow_runtime_changes=allow_runtime_changes,
7920 ignore_ipolicy=self.op.ignore_ipolicy)]
7921 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7924 # TODO: Run iallocator in this opcode and pass correct placement options to
7925 # OpInstanceMigrate. Since other jobs can modify the cluster between
7926 # running the iallocator and the actual migration, a good consistency model
7927 # will have to be found.
7929 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7930 frozenset([self.op.node_name]))
7932 return ResultWithJobs(jobs)
7935 class TLMigrateInstance(Tasklet):
7936 """Tasklet class for instance migration.
7939 @ivar live: whether the migration will be done live or non-live;
7940 this variable is initalized only after CheckPrereq has run
7941 @type cleanup: boolean
7942 @ivar cleanup: Wheater we cleanup from a failed migration
7943 @type iallocator: string
7944 @ivar iallocator: The iallocator used to determine target_node
7945 @type target_node: string
7946 @ivar target_node: If given, the target_node to reallocate the instance to
7947 @type failover: boolean
7948 @ivar failover: Whether operation results in failover or migration
7949 @type fallback: boolean
7950 @ivar fallback: Whether fallback to failover is allowed if migration not
7952 @type ignore_consistency: boolean
7953 @ivar ignore_consistency: Wheter we should ignore consistency between source
7955 @type shutdown_timeout: int
7956 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7957 @type ignore_ipolicy: bool
7958 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7963 _MIGRATION_POLL_INTERVAL = 1 # seconds
7964 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7966 def __init__(self, lu, instance_name, cleanup=False,
7967 failover=False, fallback=False,
7968 ignore_consistency=False,
7969 allow_runtime_changes=True,
7970 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7971 ignore_ipolicy=False):
7972 """Initializes this class.
7975 Tasklet.__init__(self, lu)
7978 self.instance_name = instance_name
7979 self.cleanup = cleanup
7980 self.live = False # will be overridden later
7981 self.failover = failover
7982 self.fallback = fallback
7983 self.ignore_consistency = ignore_consistency
7984 self.shutdown_timeout = shutdown_timeout
7985 self.ignore_ipolicy = ignore_ipolicy
7986 self.allow_runtime_changes = allow_runtime_changes
7988 def CheckPrereq(self):
7989 """Check prerequisites.
7991 This checks that the instance is in the cluster.
7994 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7995 instance = self.cfg.GetInstanceInfo(instance_name)
7996 assert instance is not None
7997 self.instance = instance
7998 cluster = self.cfg.GetClusterInfo()
8000 if (not self.cleanup and
8001 not instance.admin_state == constants.ADMINST_UP and
8002 not self.failover and self.fallback):
8003 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8004 " switching to failover")
8005 self.failover = True
8007 if instance.disk_template not in constants.DTS_MIRRORED:
8012 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8013 " %s" % (instance.disk_template, text),
8016 if instance.disk_template in constants.DTS_EXT_MIRROR:
8017 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8019 if self.lu.op.iallocator:
8020 self._RunAllocator()
8022 # We set set self.target_node as it is required by
8024 self.target_node = self.lu.op.target_node
8026 # Check that the target node is correct in terms of instance policy
8027 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8028 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8029 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8030 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8031 ignore=self.ignore_ipolicy)
8033 # self.target_node is already populated, either directly or by the
8035 target_node = self.target_node
8036 if self.target_node == instance.primary_node:
8037 raise errors.OpPrereqError("Cannot migrate instance %s"
8038 " to its primary (%s)" %
8039 (instance.name, instance.primary_node))
8041 if len(self.lu.tasklets) == 1:
8042 # It is safe to release locks only when we're the only tasklet
8044 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8045 keep=[instance.primary_node, self.target_node])
8048 secondary_nodes = instance.secondary_nodes
8049 if not secondary_nodes:
8050 raise errors.ConfigurationError("No secondary node but using"
8051 " %s disk template" %
8052 instance.disk_template)
8053 target_node = secondary_nodes[0]
8054 if self.lu.op.iallocator or (self.lu.op.target_node and
8055 self.lu.op.target_node != target_node):
8057 text = "failed over"
8060 raise errors.OpPrereqError("Instances with disk template %s cannot"
8061 " be %s to arbitrary nodes"
8062 " (neither an iallocator nor a target"
8063 " node can be passed)" %
8064 (instance.disk_template, text),
8066 nodeinfo = self.cfg.GetNodeInfo(target_node)
8067 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8068 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8069 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8070 ignore=self.ignore_ipolicy)
8072 i_be = cluster.FillBE(instance)
8074 # check memory requirements on the secondary node
8075 if (not self.cleanup and
8076 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8077 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8078 "migrating instance %s" %
8080 i_be[constants.BE_MINMEM],
8081 instance.hypervisor)
8083 self.lu.LogInfo("Not checking memory on the secondary node as"
8084 " instance will not be started")
8086 # check if failover must be forced instead of migration
8087 if (not self.cleanup and not self.failover and
8088 i_be[constants.BE_ALWAYS_FAILOVER]):
8090 self.lu.LogInfo("Instance configured to always failover; fallback"
8092 self.failover = True
8094 raise errors.OpPrereqError("This instance has been configured to"
8095 " always failover, please allow failover",
8098 # check bridge existance
8099 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8101 if not self.cleanup:
8102 _CheckNodeNotDrained(self.lu, target_node)
8103 if not self.failover:
8104 result = self.rpc.call_instance_migratable(instance.primary_node,
8106 if result.fail_msg and self.fallback:
8107 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8109 self.failover = True
8111 result.Raise("Can't migrate, please use failover",
8112 prereq=True, ecode=errors.ECODE_STATE)
8114 assert not (self.failover and self.cleanup)
8116 if not self.failover:
8117 if self.lu.op.live is not None and self.lu.op.mode is not None:
8118 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8119 " parameters are accepted",
8121 if self.lu.op.live is not None:
8123 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8125 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8126 # reset the 'live' parameter to None so that repeated
8127 # invocations of CheckPrereq do not raise an exception
8128 self.lu.op.live = None
8129 elif self.lu.op.mode is None:
8130 # read the default value from the hypervisor
8131 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8132 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8134 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8136 # Failover is never live
8139 if not (self.failover or self.cleanup):
8140 remote_info = self.rpc.call_instance_info(instance.primary_node,
8142 instance.hypervisor)
8143 remote_info.Raise("Error checking instance on node %s" %
8144 instance.primary_node)
8145 instance_running = bool(remote_info.payload)
8146 if instance_running:
8147 self.current_mem = int(remote_info.payload["memory"])
8149 def _RunAllocator(self):
8150 """Run the allocator based on input opcode.
8153 # FIXME: add a self.ignore_ipolicy option
8154 ial = IAllocator(self.cfg, self.rpc,
8155 mode=constants.IALLOCATOR_MODE_RELOC,
8156 name=self.instance_name,
8157 relocate_from=[self.instance.primary_node],
8160 ial.Run(self.lu.op.iallocator)
8163 raise errors.OpPrereqError("Can't compute nodes using"
8164 " iallocator '%s': %s" %
8165 (self.lu.op.iallocator, ial.info),
8167 if len(ial.result) != ial.required_nodes:
8168 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8169 " of nodes (%s), required %s" %
8170 (self.lu.op.iallocator, len(ial.result),
8171 ial.required_nodes), errors.ECODE_FAULT)
8172 self.target_node = ial.result[0]
8173 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8174 self.instance_name, self.lu.op.iallocator,
8175 utils.CommaJoin(ial.result))
8177 def _WaitUntilSync(self):
8178 """Poll with custom rpc for disk sync.
8180 This uses our own step-based rpc call.
8183 self.feedback_fn("* wait until resync is done")
8187 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8189 (self.instance.disks,
8192 for node, nres in result.items():
8193 nres.Raise("Cannot resync disks on node %s" % node)
8194 node_done, node_percent = nres.payload
8195 all_done = all_done and node_done
8196 if node_percent is not None:
8197 min_percent = min(min_percent, node_percent)
8199 if min_percent < 100:
8200 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8203 def _EnsureSecondary(self, node):
8204 """Demote a node to secondary.
8207 self.feedback_fn("* switching node %s to secondary mode" % node)
8209 for dev in self.instance.disks:
8210 self.cfg.SetDiskID(dev, node)
8212 result = self.rpc.call_blockdev_close(node, self.instance.name,
8213 self.instance.disks)
8214 result.Raise("Cannot change disk to secondary on node %s" % node)
8216 def _GoStandalone(self):
8217 """Disconnect from the network.
8220 self.feedback_fn("* changing into standalone mode")
8221 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8222 self.instance.disks)
8223 for node, nres in result.items():
8224 nres.Raise("Cannot disconnect disks node %s" % node)
8226 def _GoReconnect(self, multimaster):
8227 """Reconnect to the network.
8233 msg = "single-master"
8234 self.feedback_fn("* changing disks into %s mode" % msg)
8235 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8236 (self.instance.disks, self.instance),
8237 self.instance.name, multimaster)
8238 for node, nres in result.items():
8239 nres.Raise("Cannot change disks config on node %s" % node)
8241 def _ExecCleanup(self):
8242 """Try to cleanup after a failed migration.
8244 The cleanup is done by:
8245 - check that the instance is running only on one node
8246 (and update the config if needed)
8247 - change disks on its secondary node to secondary
8248 - wait until disks are fully synchronized
8249 - disconnect from the network
8250 - change disks into single-master mode
8251 - wait again until disks are fully synchronized
8254 instance = self.instance
8255 target_node = self.target_node
8256 source_node = self.source_node
8258 # check running on only one node
8259 self.feedback_fn("* checking where the instance actually runs"
8260 " (if this hangs, the hypervisor might be in"
8262 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8263 for node, result in ins_l.items():
8264 result.Raise("Can't contact node %s" % node)
8266 runningon_source = instance.name in ins_l[source_node].payload
8267 runningon_target = instance.name in ins_l[target_node].payload
8269 if runningon_source and runningon_target:
8270 raise errors.OpExecError("Instance seems to be running on two nodes,"
8271 " or the hypervisor is confused; you will have"
8272 " to ensure manually that it runs only on one"
8273 " and restart this operation")
8275 if not (runningon_source or runningon_target):
8276 raise errors.OpExecError("Instance does not seem to be running at all;"
8277 " in this case it's safer to repair by"
8278 " running 'gnt-instance stop' to ensure disk"
8279 " shutdown, and then restarting it")
8281 if runningon_target:
8282 # the migration has actually succeeded, we need to update the config
8283 self.feedback_fn("* instance running on secondary node (%s),"
8284 " updating config" % target_node)
8285 instance.primary_node = target_node
8286 self.cfg.Update(instance, self.feedback_fn)
8287 demoted_node = source_node
8289 self.feedback_fn("* instance confirmed to be running on its"
8290 " primary node (%s)" % source_node)
8291 demoted_node = target_node
8293 if instance.disk_template in constants.DTS_INT_MIRROR:
8294 self._EnsureSecondary(demoted_node)
8296 self._WaitUntilSync()
8297 except errors.OpExecError:
8298 # we ignore here errors, since if the device is standalone, it
8299 # won't be able to sync
8301 self._GoStandalone()
8302 self._GoReconnect(False)
8303 self._WaitUntilSync()
8305 self.feedback_fn("* done")
8307 def _RevertDiskStatus(self):
8308 """Try to revert the disk status after a failed migration.
8311 target_node = self.target_node
8312 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8316 self._EnsureSecondary(target_node)
8317 self._GoStandalone()
8318 self._GoReconnect(False)
8319 self._WaitUntilSync()
8320 except errors.OpExecError, err:
8321 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8322 " please try to recover the instance manually;"
8323 " error '%s'" % str(err))
8325 def _AbortMigration(self):
8326 """Call the hypervisor code to abort a started migration.
8329 instance = self.instance
8330 target_node = self.target_node
8331 source_node = self.source_node
8332 migration_info = self.migration_info
8334 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8338 abort_msg = abort_result.fail_msg
8340 logging.error("Aborting migration failed on target node %s: %s",
8341 target_node, abort_msg)
8342 # Don't raise an exception here, as we stil have to try to revert the
8343 # disk status, even if this step failed.
8345 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8346 instance, False, self.live)
8347 abort_msg = abort_result.fail_msg
8349 logging.error("Aborting migration failed on source node %s: %s",
8350 source_node, abort_msg)
8352 def _ExecMigration(self):
8353 """Migrate an instance.
8355 The migrate is done by:
8356 - change the disks into dual-master mode
8357 - wait until disks are fully synchronized again
8358 - migrate the instance
8359 - change disks on the new secondary node (the old primary) to secondary
8360 - wait until disks are fully synchronized
8361 - change disks into single-master mode
8364 instance = self.instance
8365 target_node = self.target_node
8366 source_node = self.source_node
8368 # Check for hypervisor version mismatch and warn the user.
8369 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8370 None, [self.instance.hypervisor])
8371 for ninfo in nodeinfo.values():
8372 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8374 (_, _, (src_info, )) = nodeinfo[source_node].payload
8375 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8377 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8378 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8379 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8380 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8381 if src_version != dst_version:
8382 self.feedback_fn("* warning: hypervisor version mismatch between"
8383 " source (%s) and target (%s) node" %
8384 (src_version, dst_version))
8386 self.feedback_fn("* checking disk consistency between source and target")
8387 for (idx, dev) in enumerate(instance.disks):
8388 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8389 raise errors.OpExecError("Disk %s is degraded or not fully"
8390 " synchronized on target node,"
8391 " aborting migration" % idx)
8393 if self.current_mem > self.tgt_free_mem:
8394 if not self.allow_runtime_changes:
8395 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8396 " free memory to fit instance %s on target"
8397 " node %s (have %dMB, need %dMB)" %
8398 (instance.name, target_node,
8399 self.tgt_free_mem, self.current_mem))
8400 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8401 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8404 rpcres.Raise("Cannot modify instance runtime memory")
8406 # First get the migration information from the remote node
8407 result = self.rpc.call_migration_info(source_node, instance)
8408 msg = result.fail_msg
8410 log_err = ("Failed fetching source migration information from %s: %s" %
8412 logging.error(log_err)
8413 raise errors.OpExecError(log_err)
8415 self.migration_info = migration_info = result.payload
8417 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8418 # Then switch the disks to master/master mode
8419 self._EnsureSecondary(target_node)
8420 self._GoStandalone()
8421 self._GoReconnect(True)
8422 self._WaitUntilSync()
8424 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8425 result = self.rpc.call_accept_instance(target_node,
8428 self.nodes_ip[target_node])
8430 msg = result.fail_msg
8432 logging.error("Instance pre-migration failed, trying to revert"
8433 " disk status: %s", msg)
8434 self.feedback_fn("Pre-migration failed, aborting")
8435 self._AbortMigration()
8436 self._RevertDiskStatus()
8437 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8438 (instance.name, msg))
8440 self.feedback_fn("* migrating instance to %s" % target_node)
8441 result = self.rpc.call_instance_migrate(source_node, instance,
8442 self.nodes_ip[target_node],
8444 msg = result.fail_msg
8446 logging.error("Instance migration failed, trying to revert"
8447 " disk status: %s", msg)
8448 self.feedback_fn("Migration failed, aborting")
8449 self._AbortMigration()
8450 self._RevertDiskStatus()
8451 raise errors.OpExecError("Could not migrate instance %s: %s" %
8452 (instance.name, msg))
8454 self.feedback_fn("* starting memory transfer")
8455 last_feedback = time.time()
8457 result = self.rpc.call_instance_get_migration_status(source_node,
8459 msg = result.fail_msg
8460 ms = result.payload # MigrationStatus instance
8461 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8462 logging.error("Instance migration failed, trying to revert"
8463 " disk status: %s", msg)
8464 self.feedback_fn("Migration failed, aborting")
8465 self._AbortMigration()
8466 self._RevertDiskStatus()
8467 raise errors.OpExecError("Could not migrate instance %s: %s" %
8468 (instance.name, msg))
8470 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8471 self.feedback_fn("* memory transfer complete")
8474 if (utils.TimeoutExpired(last_feedback,
8475 self._MIGRATION_FEEDBACK_INTERVAL) and
8476 ms.transferred_ram is not None):
8477 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8478 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8479 last_feedback = time.time()
8481 time.sleep(self._MIGRATION_POLL_INTERVAL)
8483 result = self.rpc.call_instance_finalize_migration_src(source_node,
8487 msg = result.fail_msg
8489 logging.error("Instance migration succeeded, but finalization failed"
8490 " on the source node: %s", msg)
8491 raise errors.OpExecError("Could not finalize instance migration: %s" %
8494 instance.primary_node = target_node
8496 # distribute new instance config to the other nodes
8497 self.cfg.Update(instance, self.feedback_fn)
8499 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8503 msg = result.fail_msg
8505 logging.error("Instance migration succeeded, but finalization failed"
8506 " on the target node: %s", msg)
8507 raise errors.OpExecError("Could not finalize instance migration: %s" %
8510 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8511 self._EnsureSecondary(source_node)
8512 self._WaitUntilSync()
8513 self._GoStandalone()
8514 self._GoReconnect(False)
8515 self._WaitUntilSync()
8517 # If the instance's disk template is `rbd' and there was a successful
8518 # migration, unmap the device from the source node.
8519 if self.instance.disk_template == constants.DT_RBD:
8520 disks = _ExpandCheckDisks(instance, instance.disks)
8521 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8523 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8524 msg = result.fail_msg
8526 logging.error("Migration was successful, but couldn't unmap the"
8527 " block device %s on source node %s: %s",
8528 disk.iv_name, source_node, msg)
8529 logging.error("You need to unmap the device %s manually on %s",
8530 disk.iv_name, source_node)
8532 self.feedback_fn("* done")
8534 def _ExecFailover(self):
8535 """Failover an instance.
8537 The failover is done by shutting it down on its present node and
8538 starting it on the secondary.
8541 instance = self.instance
8542 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8544 source_node = instance.primary_node
8545 target_node = self.target_node
8547 if instance.admin_state == constants.ADMINST_UP:
8548 self.feedback_fn("* checking disk consistency between source and target")
8549 for (idx, dev) in enumerate(instance.disks):
8550 # for drbd, these are drbd over lvm
8551 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8553 if primary_node.offline:
8554 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8556 (primary_node.name, idx, target_node))
8557 elif not self.ignore_consistency:
8558 raise errors.OpExecError("Disk %s is degraded on target node,"
8559 " aborting failover" % idx)
8561 self.feedback_fn("* not checking disk consistency as instance is not"
8564 self.feedback_fn("* shutting down instance on source node")
8565 logging.info("Shutting down instance %s on node %s",
8566 instance.name, source_node)
8568 result = self.rpc.call_instance_shutdown(source_node, instance,
8569 self.shutdown_timeout)
8570 msg = result.fail_msg
8572 if self.ignore_consistency or primary_node.offline:
8573 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8574 " proceeding anyway; please make sure node"
8575 " %s is down; error details: %s",
8576 instance.name, source_node, source_node, msg)
8578 raise errors.OpExecError("Could not shutdown instance %s on"
8580 (instance.name, source_node, msg))
8582 self.feedback_fn("* deactivating the instance's disks on source node")
8583 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8584 raise errors.OpExecError("Can't shut down the instance's disks")
8586 instance.primary_node = target_node
8587 # distribute new instance config to the other nodes
8588 self.cfg.Update(instance, self.feedback_fn)
8590 # Only start the instance if it's marked as up
8591 if instance.admin_state == constants.ADMINST_UP:
8592 self.feedback_fn("* activating the instance's disks on target node %s" %
8594 logging.info("Starting instance %s on node %s",
8595 instance.name, target_node)
8597 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8598 ignore_secondaries=True)
8600 _ShutdownInstanceDisks(self.lu, instance)
8601 raise errors.OpExecError("Can't activate the instance's disks")
8603 self.feedback_fn("* starting the instance on the target node %s" %
8605 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8607 msg = result.fail_msg
8609 _ShutdownInstanceDisks(self.lu, instance)
8610 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8611 (instance.name, target_node, msg))
8613 def Exec(self, feedback_fn):
8614 """Perform the migration.
8617 self.feedback_fn = feedback_fn
8618 self.source_node = self.instance.primary_node
8620 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8621 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8622 self.target_node = self.instance.secondary_nodes[0]
8623 # Otherwise self.target_node has been populated either
8624 # directly, or through an iallocator.
8626 self.all_nodes = [self.source_node, self.target_node]
8627 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8628 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8631 feedback_fn("Failover instance %s" % self.instance.name)
8632 self._ExecFailover()
8634 feedback_fn("Migrating instance %s" % self.instance.name)
8637 return self._ExecCleanup()
8639 return self._ExecMigration()
8642 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8644 """Wrapper around L{_CreateBlockDevInner}.
8646 This method annotates the root device first.
8649 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8650 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8654 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8656 """Create a tree of block devices on a given node.
8658 If this device type has to be created on secondaries, create it and
8661 If not, just recurse to children keeping the same 'force' value.
8663 @attention: The device has to be annotated already.
8665 @param lu: the lu on whose behalf we execute
8666 @param node: the node on which to create the device
8667 @type instance: L{objects.Instance}
8668 @param instance: the instance which owns the device
8669 @type device: L{objects.Disk}
8670 @param device: the device to create
8671 @type force_create: boolean
8672 @param force_create: whether to force creation of this device; this
8673 will be change to True whenever we find a device which has
8674 CreateOnSecondary() attribute
8675 @param info: the extra 'metadata' we should attach to the device
8676 (this will be represented as a LVM tag)
8677 @type force_open: boolean
8678 @param force_open: this parameter will be passes to the
8679 L{backend.BlockdevCreate} function where it specifies
8680 whether we run on primary or not, and it affects both
8681 the child assembly and the device own Open() execution
8684 if device.CreateOnSecondary():
8688 for child in device.children:
8689 _CreateBlockDevInner(lu, node, instance, child, force_create,
8692 if not force_create:
8695 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8698 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8699 """Create a single block device on a given node.
8701 This will not recurse over children of the device, so they must be
8704 @param lu: the lu on whose behalf we execute
8705 @param node: the node on which to create the device
8706 @type instance: L{objects.Instance}
8707 @param instance: the instance which owns the device
8708 @type device: L{objects.Disk}
8709 @param device: the device to create
8710 @param info: the extra 'metadata' we should attach to the device
8711 (this will be represented as a LVM tag)
8712 @type force_open: boolean
8713 @param force_open: this parameter will be passes to the
8714 L{backend.BlockdevCreate} function where it specifies
8715 whether we run on primary or not, and it affects both
8716 the child assembly and the device own Open() execution
8719 lu.cfg.SetDiskID(device, node)
8720 result = lu.rpc.call_blockdev_create(node, device, device.size,
8721 instance.name, force_open, info)
8722 result.Raise("Can't create block device %s on"
8723 " node %s for instance %s" % (device, node, instance.name))
8724 if device.physical_id is None:
8725 device.physical_id = result.payload
8728 def _GenerateUniqueNames(lu, exts):
8729 """Generate a suitable LV name.
8731 This will generate a logical volume name for the given instance.
8736 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8737 results.append("%s%s" % (new_id, val))
8741 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8742 iv_name, p_minor, s_minor):
8743 """Generate a drbd8 device complete with its children.
8746 assert len(vgnames) == len(names) == 2
8747 port = lu.cfg.AllocatePort()
8748 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8750 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8751 logical_id=(vgnames[0], names[0]),
8753 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8754 logical_id=(vgnames[1], names[1]),
8756 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8757 logical_id=(primary, secondary, port,
8760 children=[dev_data, dev_meta],
8761 iv_name=iv_name, params={})
8765 _DISK_TEMPLATE_NAME_PREFIX = {
8766 constants.DT_PLAIN: "",
8767 constants.DT_RBD: ".rbd",
8771 _DISK_TEMPLATE_DEVICE_TYPE = {
8772 constants.DT_PLAIN: constants.LD_LV,
8773 constants.DT_FILE: constants.LD_FILE,
8774 constants.DT_SHARED_FILE: constants.LD_FILE,
8775 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8776 constants.DT_RBD: constants.LD_RBD,
8780 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8781 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8782 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8783 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8784 """Generate the entire disk layout for a given template type.
8787 #TODO: compute space requirements
8789 vgname = lu.cfg.GetVGName()
8790 disk_count = len(disk_info)
8793 if template_name == constants.DT_DISKLESS:
8795 elif template_name == constants.DT_DRBD8:
8796 if len(secondary_nodes) != 1:
8797 raise errors.ProgrammerError("Wrong template configuration")
8798 remote_node = secondary_nodes[0]
8799 minors = lu.cfg.AllocateDRBDMinor(
8800 [primary_node, remote_node] * len(disk_info), instance_name)
8802 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8804 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8807 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8808 for i in range(disk_count)]):
8809 names.append(lv_prefix + "_data")
8810 names.append(lv_prefix + "_meta")
8811 for idx, disk in enumerate(disk_info):
8812 disk_index = idx + base_index
8813 data_vg = disk.get(constants.IDISK_VG, vgname)
8814 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8815 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8816 disk[constants.IDISK_SIZE],
8818 names[idx * 2:idx * 2 + 2],
8819 "disk/%d" % disk_index,
8820 minors[idx * 2], minors[idx * 2 + 1])
8821 disk_dev.mode = disk[constants.IDISK_MODE]
8822 disks.append(disk_dev)
8825 raise errors.ProgrammerError("Wrong template configuration")
8827 if template_name == constants.DT_FILE:
8829 elif template_name == constants.DT_SHARED_FILE:
8830 _req_shr_file_storage()
8832 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8833 if name_prefix is None:
8836 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8837 (name_prefix, base_index + i)
8838 for i in range(disk_count)])
8840 if template_name == constants.DT_PLAIN:
8841 def logical_id_fn(idx, _, disk):
8842 vg = disk.get(constants.IDISK_VG, vgname)
8843 return (vg, names[idx])
8844 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8846 lambda _, disk_index, disk: (file_driver,
8847 "%s/disk%d" % (file_storage_dir,
8849 elif template_name == constants.DT_BLOCK:
8851 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8852 disk[constants.IDISK_ADOPT])
8853 elif template_name == constants.DT_RBD:
8854 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8856 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8858 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8860 for idx, disk in enumerate(disk_info):
8861 disk_index = idx + base_index
8862 size = disk[constants.IDISK_SIZE]
8863 feedback_fn("* disk %s, size %s" %
8864 (disk_index, utils.FormatUnit(size, "h")))
8865 disks.append(objects.Disk(dev_type=dev_type, size=size,
8866 logical_id=logical_id_fn(idx, disk_index, disk),
8867 iv_name="disk/%d" % disk_index,
8868 mode=disk[constants.IDISK_MODE],
8874 def _GetInstanceInfoText(instance):
8875 """Compute that text that should be added to the disk's metadata.
8878 return "originstname+%s" % instance.name
8881 def _CalcEta(time_taken, written, total_size):
8882 """Calculates the ETA based on size written and total size.
8884 @param time_taken: The time taken so far
8885 @param written: amount written so far
8886 @param total_size: The total size of data to be written
8887 @return: The remaining time in seconds
8890 avg_time = time_taken / float(written)
8891 return (total_size - written) * avg_time
8894 def _WipeDisks(lu, instance):
8895 """Wipes instance disks.
8897 @type lu: L{LogicalUnit}
8898 @param lu: the logical unit on whose behalf we execute
8899 @type instance: L{objects.Instance}
8900 @param instance: the instance whose disks we should create
8901 @return: the success of the wipe
8904 node = instance.primary_node
8906 for device in instance.disks:
8907 lu.cfg.SetDiskID(device, node)
8909 logging.info("Pause sync of instance %s disks", instance.name)
8910 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8911 (instance.disks, instance),
8914 for idx, success in enumerate(result.payload):
8916 logging.warn("pause-sync of instance %s for disks %d failed",
8920 for idx, device in enumerate(instance.disks):
8921 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8922 # MAX_WIPE_CHUNK at max
8923 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8924 constants.MIN_WIPE_CHUNK_PERCENT)
8925 # we _must_ make this an int, otherwise rounding errors will
8927 wipe_chunk_size = int(wipe_chunk_size)
8929 lu.LogInfo("* Wiping disk %d", idx)
8930 logging.info("Wiping disk %d for instance %s, node %s using"
8931 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8936 start_time = time.time()
8938 while offset < size:
8939 wipe_size = min(wipe_chunk_size, size - offset)
8940 logging.debug("Wiping disk %d, offset %s, chunk %s",
8941 idx, offset, wipe_size)
8942 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8944 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8945 (idx, offset, wipe_size))
8948 if now - last_output >= 60:
8949 eta = _CalcEta(now - start_time, offset, size)
8950 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8951 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8954 logging.info("Resume sync of instance %s disks", instance.name)
8956 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8957 (instance.disks, instance),
8960 for idx, success in enumerate(result.payload):
8962 lu.LogWarning("Resume sync of disk %d failed, please have a"
8963 " look at the status and troubleshoot the issue", idx)
8964 logging.warn("resume-sync of instance %s for disks %d failed",
8968 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8969 """Create all disks for an instance.
8971 This abstracts away some work from AddInstance.
8973 @type lu: L{LogicalUnit}
8974 @param lu: the logical unit on whose behalf we execute
8975 @type instance: L{objects.Instance}
8976 @param instance: the instance whose disks we should create
8978 @param to_skip: list of indices to skip
8979 @type target_node: string
8980 @param target_node: if passed, overrides the target node for creation
8982 @return: the success of the creation
8985 info = _GetInstanceInfoText(instance)
8986 if target_node is None:
8987 pnode = instance.primary_node
8988 all_nodes = instance.all_nodes
8993 if instance.disk_template in constants.DTS_FILEBASED:
8994 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8995 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8997 result.Raise("Failed to create directory '%s' on"
8998 " node %s" % (file_storage_dir, pnode))
9000 # Note: this needs to be kept in sync with adding of disks in
9001 # LUInstanceSetParams
9002 for idx, device in enumerate(instance.disks):
9003 if to_skip and idx in to_skip:
9005 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9007 for node in all_nodes:
9008 f_create = node == pnode
9009 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9012 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9013 """Remove all disks for an instance.
9015 This abstracts away some work from `AddInstance()` and
9016 `RemoveInstance()`. Note that in case some of the devices couldn't
9017 be removed, the removal will continue with the other ones (compare
9018 with `_CreateDisks()`).
9020 @type lu: L{LogicalUnit}
9021 @param lu: the logical unit on whose behalf we execute
9022 @type instance: L{objects.Instance}
9023 @param instance: the instance whose disks we should remove
9024 @type target_node: string
9025 @param target_node: used to override the node on which to remove the disks
9027 @return: the success of the removal
9030 logging.info("Removing block devices for instance %s", instance.name)
9033 ports_to_release = set()
9034 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9035 for (idx, device) in enumerate(anno_disks):
9037 edata = [(target_node, device)]
9039 edata = device.ComputeNodeTree(instance.primary_node)
9040 for node, disk in edata:
9041 lu.cfg.SetDiskID(disk, node)
9042 result = lu.rpc.call_blockdev_remove(node, disk)
9044 lu.LogWarning("Could not remove disk %s on node %s,"
9045 " continuing anyway: %s", idx, node, result.fail_msg)
9046 if not (result.offline and node != instance.primary_node):
9049 # if this is a DRBD disk, return its port to the pool
9050 if device.dev_type in constants.LDS_DRBD:
9051 ports_to_release.add(device.logical_id[2])
9053 if all_result or ignore_failures:
9054 for port in ports_to_release:
9055 lu.cfg.AddTcpUdpPort(port)
9057 if instance.disk_template == constants.DT_FILE:
9058 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9062 tgt = instance.primary_node
9063 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9065 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9066 file_storage_dir, instance.primary_node, result.fail_msg)
9072 def _ComputeDiskSizePerVG(disk_template, disks):
9073 """Compute disk size requirements in the volume group
9076 def _compute(disks, payload):
9077 """Universal algorithm.
9082 vgs[disk[constants.IDISK_VG]] = \
9083 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9087 # Required free disk space as a function of disk and swap space
9089 constants.DT_DISKLESS: {},
9090 constants.DT_PLAIN: _compute(disks, 0),
9091 # 128 MB are added for drbd metadata for each disk
9092 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9093 constants.DT_FILE: {},
9094 constants.DT_SHARED_FILE: {},
9097 if disk_template not in req_size_dict:
9098 raise errors.ProgrammerError("Disk template '%s' size requirement"
9099 " is unknown" % disk_template)
9101 return req_size_dict[disk_template]
9104 def _ComputeDiskSize(disk_template, disks):
9105 """Compute disk size requirements in the volume group
9108 # Required free disk space as a function of disk and swap space
9110 constants.DT_DISKLESS: None,
9111 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9112 # 128 MB are added for drbd metadata for each disk
9114 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9115 constants.DT_FILE: None,
9116 constants.DT_SHARED_FILE: 0,
9117 constants.DT_BLOCK: 0,
9118 constants.DT_RBD: 0,
9121 if disk_template not in req_size_dict:
9122 raise errors.ProgrammerError("Disk template '%s' size requirement"
9123 " is unknown" % disk_template)
9125 return req_size_dict[disk_template]
9128 def _FilterVmNodes(lu, nodenames):
9129 """Filters out non-vm_capable nodes from a list.
9131 @type lu: L{LogicalUnit}
9132 @param lu: the logical unit for which we check
9133 @type nodenames: list
9134 @param nodenames: the list of nodes on which we should check
9136 @return: the list of vm-capable nodes
9139 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9140 return [name for name in nodenames if name not in vm_nodes]
9143 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9144 """Hypervisor parameter validation.
9146 This function abstract the hypervisor parameter validation to be
9147 used in both instance create and instance modify.
9149 @type lu: L{LogicalUnit}
9150 @param lu: the logical unit for which we check
9151 @type nodenames: list
9152 @param nodenames: the list of nodes on which we should check
9153 @type hvname: string
9154 @param hvname: the name of the hypervisor we should use
9155 @type hvparams: dict
9156 @param hvparams: the parameters which we need to check
9157 @raise errors.OpPrereqError: if the parameters are not valid
9160 nodenames = _FilterVmNodes(lu, nodenames)
9162 cluster = lu.cfg.GetClusterInfo()
9163 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9165 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9166 for node in nodenames:
9170 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9173 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9174 """OS parameters validation.
9176 @type lu: L{LogicalUnit}
9177 @param lu: the logical unit for which we check
9178 @type required: boolean
9179 @param required: whether the validation should fail if the OS is not
9181 @type nodenames: list
9182 @param nodenames: the list of nodes on which we should check
9183 @type osname: string
9184 @param osname: the name of the hypervisor we should use
9185 @type osparams: dict
9186 @param osparams: the parameters which we need to check
9187 @raise errors.OpPrereqError: if the parameters are not valid
9190 nodenames = _FilterVmNodes(lu, nodenames)
9191 result = lu.rpc.call_os_validate(nodenames, required, osname,
9192 [constants.OS_VALIDATE_PARAMETERS],
9194 for node, nres in result.items():
9195 # we don't check for offline cases since this should be run only
9196 # against the master node and/or an instance's nodes
9197 nres.Raise("OS Parameters validation failed on node %s" % node)
9198 if not nres.payload:
9199 lu.LogInfo("OS %s not found on node %s, validation skipped",
9203 class LUInstanceCreate(LogicalUnit):
9204 """Create an instance.
9207 HPATH = "instance-add"
9208 HTYPE = constants.HTYPE_INSTANCE
9211 def CheckArguments(self):
9215 # do not require name_check to ease forward/backward compatibility
9217 if self.op.no_install and self.op.start:
9218 self.LogInfo("No-installation mode selected, disabling startup")
9219 self.op.start = False
9220 # validate/normalize the instance name
9221 self.op.instance_name = \
9222 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9224 if self.op.ip_check and not self.op.name_check:
9225 # TODO: make the ip check more flexible and not depend on the name check
9226 raise errors.OpPrereqError("Cannot do IP address check without a name"
9227 " check", errors.ECODE_INVAL)
9229 # check nics' parameter names
9230 for nic in self.op.nics:
9231 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9233 # check disks. parameter names and consistent adopt/no-adopt strategy
9234 has_adopt = has_no_adopt = False
9235 for disk in self.op.disks:
9236 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9237 if constants.IDISK_ADOPT in disk:
9241 if has_adopt and has_no_adopt:
9242 raise errors.OpPrereqError("Either all disks are adopted or none is",
9245 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9246 raise errors.OpPrereqError("Disk adoption is not supported for the"
9247 " '%s' disk template" %
9248 self.op.disk_template,
9250 if self.op.iallocator is not None:
9251 raise errors.OpPrereqError("Disk adoption not allowed with an"
9252 " iallocator script", errors.ECODE_INVAL)
9253 if self.op.mode == constants.INSTANCE_IMPORT:
9254 raise errors.OpPrereqError("Disk adoption not allowed for"
9255 " instance import", errors.ECODE_INVAL)
9257 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9258 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9259 " but no 'adopt' parameter given" %
9260 self.op.disk_template,
9263 self.adopt_disks = has_adopt
9265 # instance name verification
9266 if self.op.name_check:
9267 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9268 self.op.instance_name = self.hostname1.name
9269 # used in CheckPrereq for ip ping check
9270 self.check_ip = self.hostname1.ip
9272 self.check_ip = None
9274 # file storage checks
9275 if (self.op.file_driver and
9276 not self.op.file_driver in constants.FILE_DRIVER):
9277 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9278 self.op.file_driver, errors.ECODE_INVAL)
9280 if self.op.disk_template == constants.DT_FILE:
9281 opcodes.RequireFileStorage()
9282 elif self.op.disk_template == constants.DT_SHARED_FILE:
9283 opcodes.RequireSharedFileStorage()
9285 ### Node/iallocator related checks
9286 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9288 if self.op.pnode is not None:
9289 if self.op.disk_template in constants.DTS_INT_MIRROR:
9290 if self.op.snode is None:
9291 raise errors.OpPrereqError("The networked disk templates need"
9292 " a mirror node", errors.ECODE_INVAL)
9294 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9296 self.op.snode = None
9298 self._cds = _GetClusterDomainSecret()
9300 if self.op.mode == constants.INSTANCE_IMPORT:
9301 # On import force_variant must be True, because if we forced it at
9302 # initial install, our only chance when importing it back is that it
9304 self.op.force_variant = True
9306 if self.op.no_install:
9307 self.LogInfo("No-installation mode has no effect during import")
9309 elif self.op.mode == constants.INSTANCE_CREATE:
9310 if self.op.os_type is None:
9311 raise errors.OpPrereqError("No guest OS specified",
9313 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9314 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9315 " installation" % self.op.os_type,
9317 if self.op.disk_template is None:
9318 raise errors.OpPrereqError("No disk template specified",
9321 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9322 # Check handshake to ensure both clusters have the same domain secret
9323 src_handshake = self.op.source_handshake
9324 if not src_handshake:
9325 raise errors.OpPrereqError("Missing source handshake",
9328 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9331 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9334 # Load and check source CA
9335 self.source_x509_ca_pem = self.op.source_x509_ca
9336 if not self.source_x509_ca_pem:
9337 raise errors.OpPrereqError("Missing source X509 CA",
9341 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9343 except OpenSSL.crypto.Error, err:
9344 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9345 (err, ), errors.ECODE_INVAL)
9347 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9348 if errcode is not None:
9349 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9352 self.source_x509_ca = cert
9354 src_instance_name = self.op.source_instance_name
9355 if not src_instance_name:
9356 raise errors.OpPrereqError("Missing source instance name",
9359 self.source_instance_name = \
9360 netutils.GetHostname(name=src_instance_name).name
9363 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9364 self.op.mode, errors.ECODE_INVAL)
9366 def ExpandNames(self):
9367 """ExpandNames for CreateInstance.
9369 Figure out the right locks for instance creation.
9372 self.needed_locks = {}
9374 instance_name = self.op.instance_name
9375 # this is just a preventive check, but someone might still add this
9376 # instance in the meantime, and creation will fail at lock-add time
9377 if instance_name in self.cfg.GetInstanceList():
9378 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9379 instance_name, errors.ECODE_EXISTS)
9381 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9383 if self.op.iallocator:
9384 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9385 # specifying a group on instance creation and then selecting nodes from
9387 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9388 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9390 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9391 nodelist = [self.op.pnode]
9392 if self.op.snode is not None:
9393 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9394 nodelist.append(self.op.snode)
9395 self.needed_locks[locking.LEVEL_NODE] = nodelist
9396 # Lock resources of instance's primary and secondary nodes (copy to
9397 # prevent accidential modification)
9398 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9400 # in case of import lock the source node too
9401 if self.op.mode == constants.INSTANCE_IMPORT:
9402 src_node = self.op.src_node
9403 src_path = self.op.src_path
9405 if src_path is None:
9406 self.op.src_path = src_path = self.op.instance_name
9408 if src_node is None:
9409 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9410 self.op.src_node = None
9411 if os.path.isabs(src_path):
9412 raise errors.OpPrereqError("Importing an instance from a path"
9413 " requires a source node option",
9416 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9417 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9418 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9419 if not os.path.isabs(src_path):
9420 self.op.src_path = src_path = \
9421 utils.PathJoin(constants.EXPORT_DIR, src_path)
9423 def _RunAllocator(self):
9424 """Run the allocator based on input opcode.
9427 nics = [n.ToDict() for n in self.nics]
9428 ial = IAllocator(self.cfg, self.rpc,
9429 mode=constants.IALLOCATOR_MODE_ALLOC,
9430 name=self.op.instance_name,
9431 disk_template=self.op.disk_template,
9434 vcpus=self.be_full[constants.BE_VCPUS],
9435 memory=self.be_full[constants.BE_MAXMEM],
9436 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9439 hypervisor=self.op.hypervisor,
9442 ial.Run(self.op.iallocator)
9445 raise errors.OpPrereqError("Can't compute nodes using"
9446 " iallocator '%s': %s" %
9447 (self.op.iallocator, ial.info),
9449 if len(ial.result) != ial.required_nodes:
9450 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9451 " of nodes (%s), required %s" %
9452 (self.op.iallocator, len(ial.result),
9453 ial.required_nodes), errors.ECODE_FAULT)
9454 self.op.pnode = ial.result[0]
9455 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9456 self.op.instance_name, self.op.iallocator,
9457 utils.CommaJoin(ial.result))
9458 if ial.required_nodes == 2:
9459 self.op.snode = ial.result[1]
9461 def BuildHooksEnv(self):
9464 This runs on master, primary and secondary nodes of the instance.
9468 "ADD_MODE": self.op.mode,
9470 if self.op.mode == constants.INSTANCE_IMPORT:
9471 env["SRC_NODE"] = self.op.src_node
9472 env["SRC_PATH"] = self.op.src_path
9473 env["SRC_IMAGES"] = self.src_images
9475 env.update(_BuildInstanceHookEnv(
9476 name=self.op.instance_name,
9477 primary_node=self.op.pnode,
9478 secondary_nodes=self.secondaries,
9479 status=self.op.start,
9480 os_type=self.op.os_type,
9481 minmem=self.be_full[constants.BE_MINMEM],
9482 maxmem=self.be_full[constants.BE_MAXMEM],
9483 vcpus=self.be_full[constants.BE_VCPUS],
9484 nics=_NICListToTuple(self, self.nics),
9485 disk_template=self.op.disk_template,
9486 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9487 for d in self.disks],
9490 hypervisor_name=self.op.hypervisor,
9496 def BuildHooksNodes(self):
9497 """Build hooks nodes.
9500 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9503 def _ReadExportInfo(self):
9504 """Reads the export information from disk.
9506 It will override the opcode source node and path with the actual
9507 information, if these two were not specified before.
9509 @return: the export information
9512 assert self.op.mode == constants.INSTANCE_IMPORT
9514 src_node = self.op.src_node
9515 src_path = self.op.src_path
9517 if src_node is None:
9518 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9519 exp_list = self.rpc.call_export_list(locked_nodes)
9521 for node in exp_list:
9522 if exp_list[node].fail_msg:
9524 if src_path in exp_list[node].payload:
9526 self.op.src_node = src_node = node
9527 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9531 raise errors.OpPrereqError("No export found for relative path %s" %
9532 src_path, errors.ECODE_INVAL)
9534 _CheckNodeOnline(self, src_node)
9535 result = self.rpc.call_export_info(src_node, src_path)
9536 result.Raise("No export or invalid export found in dir %s" % src_path)
9538 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9539 if not export_info.has_section(constants.INISECT_EXP):
9540 raise errors.ProgrammerError("Corrupted export config",
9541 errors.ECODE_ENVIRON)
9543 ei_version = export_info.get(constants.INISECT_EXP, "version")
9544 if (int(ei_version) != constants.EXPORT_VERSION):
9545 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9546 (ei_version, constants.EXPORT_VERSION),
9547 errors.ECODE_ENVIRON)
9550 def _ReadExportParams(self, einfo):
9551 """Use export parameters as defaults.
9553 In case the opcode doesn't specify (as in override) some instance
9554 parameters, then try to use them from the export information, if
9558 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9560 if self.op.disk_template is None:
9561 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9562 self.op.disk_template = einfo.get(constants.INISECT_INS,
9564 if self.op.disk_template not in constants.DISK_TEMPLATES:
9565 raise errors.OpPrereqError("Disk template specified in configuration"
9566 " file is not one of the allowed values:"
9567 " %s" % " ".join(constants.DISK_TEMPLATES))
9569 raise errors.OpPrereqError("No disk template specified and the export"
9570 " is missing the disk_template information",
9573 if not self.op.disks:
9575 # TODO: import the disk iv_name too
9576 for idx in range(constants.MAX_DISKS):
9577 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9578 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9579 disks.append({constants.IDISK_SIZE: disk_sz})
9580 self.op.disks = disks
9581 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9582 raise errors.OpPrereqError("No disk info specified and the export"
9583 " is missing the disk information",
9586 if not self.op.nics:
9588 for idx in range(constants.MAX_NICS):
9589 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9591 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9592 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9599 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9600 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9602 if (self.op.hypervisor is None and
9603 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9604 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9606 if einfo.has_section(constants.INISECT_HYP):
9607 # use the export parameters but do not override the ones
9608 # specified by the user
9609 for name, value in einfo.items(constants.INISECT_HYP):
9610 if name not in self.op.hvparams:
9611 self.op.hvparams[name] = value
9613 if einfo.has_section(constants.INISECT_BEP):
9614 # use the parameters, without overriding
9615 for name, value in einfo.items(constants.INISECT_BEP):
9616 if name not in self.op.beparams:
9617 self.op.beparams[name] = value
9618 # Compatibility for the old "memory" be param
9619 if name == constants.BE_MEMORY:
9620 if constants.BE_MAXMEM not in self.op.beparams:
9621 self.op.beparams[constants.BE_MAXMEM] = value
9622 if constants.BE_MINMEM not in self.op.beparams:
9623 self.op.beparams[constants.BE_MINMEM] = value
9625 # try to read the parameters old style, from the main section
9626 for name in constants.BES_PARAMETERS:
9627 if (name not in self.op.beparams and
9628 einfo.has_option(constants.INISECT_INS, name)):
9629 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9631 if einfo.has_section(constants.INISECT_OSP):
9632 # use the parameters, without overriding
9633 for name, value in einfo.items(constants.INISECT_OSP):
9634 if name not in self.op.osparams:
9635 self.op.osparams[name] = value
9637 def _RevertToDefaults(self, cluster):
9638 """Revert the instance parameters to the default values.
9642 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9643 for name in self.op.hvparams.keys():
9644 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9645 del self.op.hvparams[name]
9647 be_defs = cluster.SimpleFillBE({})
9648 for name in self.op.beparams.keys():
9649 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9650 del self.op.beparams[name]
9652 nic_defs = cluster.SimpleFillNIC({})
9653 for nic in self.op.nics:
9654 for name in constants.NICS_PARAMETERS:
9655 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9658 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9659 for name in self.op.osparams.keys():
9660 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9661 del self.op.osparams[name]
9663 def _CalculateFileStorageDir(self):
9664 """Calculate final instance file storage dir.
9667 # file storage dir calculation/check
9668 self.instance_file_storage_dir = None
9669 if self.op.disk_template in constants.DTS_FILEBASED:
9670 # build the full file storage dir path
9673 if self.op.disk_template == constants.DT_SHARED_FILE:
9674 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9676 get_fsd_fn = self.cfg.GetFileStorageDir
9678 cfg_storagedir = get_fsd_fn()
9679 if not cfg_storagedir:
9680 raise errors.OpPrereqError("Cluster file storage dir not defined")
9681 joinargs.append(cfg_storagedir)
9683 if self.op.file_storage_dir is not None:
9684 joinargs.append(self.op.file_storage_dir)
9686 joinargs.append(self.op.instance_name)
9688 # pylint: disable=W0142
9689 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9691 def CheckPrereq(self): # pylint: disable=R0914
9692 """Check prerequisites.
9695 self._CalculateFileStorageDir()
9697 if self.op.mode == constants.INSTANCE_IMPORT:
9698 export_info = self._ReadExportInfo()
9699 self._ReadExportParams(export_info)
9700 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9702 self._old_instance_name = None
9704 if (not self.cfg.GetVGName() and
9705 self.op.disk_template not in constants.DTS_NOT_LVM):
9706 raise errors.OpPrereqError("Cluster does not support lvm-based"
9707 " instances", errors.ECODE_STATE)
9709 if (self.op.hypervisor is None or
9710 self.op.hypervisor == constants.VALUE_AUTO):
9711 self.op.hypervisor = self.cfg.GetHypervisorType()
9713 cluster = self.cfg.GetClusterInfo()
9714 enabled_hvs = cluster.enabled_hypervisors
9715 if self.op.hypervisor not in enabled_hvs:
9716 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9717 " cluster (%s)" % (self.op.hypervisor,
9718 ",".join(enabled_hvs)),
9721 # Check tag validity
9722 for tag in self.op.tags:
9723 objects.TaggableObject.ValidateTag(tag)
9725 # check hypervisor parameter syntax (locally)
9726 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9727 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9729 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9730 hv_type.CheckParameterSyntax(filled_hvp)
9731 self.hv_full = filled_hvp
9732 # check that we don't specify global parameters on an instance
9733 _CheckGlobalHvParams(self.op.hvparams)
9735 # fill and remember the beparams dict
9736 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9737 for param, value in self.op.beparams.iteritems():
9738 if value == constants.VALUE_AUTO:
9739 self.op.beparams[param] = default_beparams[param]
9740 objects.UpgradeBeParams(self.op.beparams)
9741 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9742 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9744 # build os parameters
9745 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9747 # now that hvp/bep are in final format, let's reset to defaults,
9749 if self.op.identify_defaults:
9750 self._RevertToDefaults(cluster)
9754 for idx, nic in enumerate(self.op.nics):
9755 nic_mode_req = nic.get(constants.INIC_MODE, None)
9756 nic_mode = nic_mode_req
9757 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9758 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9760 # in routed mode, for the first nic, the default ip is 'auto'
9761 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9762 default_ip_mode = constants.VALUE_AUTO
9764 default_ip_mode = constants.VALUE_NONE
9766 # ip validity checks
9767 ip = nic.get(constants.INIC_IP, default_ip_mode)
9768 if ip is None or ip.lower() == constants.VALUE_NONE:
9770 elif ip.lower() == constants.VALUE_AUTO:
9771 if not self.op.name_check:
9772 raise errors.OpPrereqError("IP address set to auto but name checks"
9773 " have been skipped",
9775 nic_ip = self.hostname1.ip
9777 if not netutils.IPAddress.IsValid(ip):
9778 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9782 # TODO: check the ip address for uniqueness
9783 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9784 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9787 # MAC address verification
9788 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9789 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9790 mac = utils.NormalizeAndValidateMac(mac)
9793 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9794 except errors.ReservationError:
9795 raise errors.OpPrereqError("MAC address %s already in use"
9796 " in cluster" % mac,
9797 errors.ECODE_NOTUNIQUE)
9799 # Build nic parameters
9800 link = nic.get(constants.INIC_LINK, None)
9801 if link == constants.VALUE_AUTO:
9802 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9805 nicparams[constants.NIC_MODE] = nic_mode
9807 nicparams[constants.NIC_LINK] = link
9809 check_params = cluster.SimpleFillNIC(nicparams)
9810 objects.NIC.CheckParameterSyntax(check_params)
9811 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9813 # disk checks/pre-build
9814 default_vg = self.cfg.GetVGName()
9816 for disk in self.op.disks:
9817 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9818 if mode not in constants.DISK_ACCESS_SET:
9819 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9820 mode, errors.ECODE_INVAL)
9821 size = disk.get(constants.IDISK_SIZE, None)
9823 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9826 except (TypeError, ValueError):
9827 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9830 data_vg = disk.get(constants.IDISK_VG, default_vg)
9832 constants.IDISK_SIZE: size,
9833 constants.IDISK_MODE: mode,
9834 constants.IDISK_VG: data_vg,
9836 if constants.IDISK_METAVG in disk:
9837 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9838 if constants.IDISK_ADOPT in disk:
9839 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9840 self.disks.append(new_disk)
9842 if self.op.mode == constants.INSTANCE_IMPORT:
9844 for idx in range(len(self.disks)):
9845 option = "disk%d_dump" % idx
9846 if export_info.has_option(constants.INISECT_INS, option):
9847 # FIXME: are the old os-es, disk sizes, etc. useful?
9848 export_name = export_info.get(constants.INISECT_INS, option)
9849 image = utils.PathJoin(self.op.src_path, export_name)
9850 disk_images.append(image)
9852 disk_images.append(False)
9854 self.src_images = disk_images
9856 if self.op.instance_name == self._old_instance_name:
9857 for idx, nic in enumerate(self.nics):
9858 if nic.mac == constants.VALUE_AUTO:
9859 nic_mac_ini = "nic%d_mac" % idx
9860 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9862 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9864 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9865 if self.op.ip_check:
9866 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9867 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9868 (self.check_ip, self.op.instance_name),
9869 errors.ECODE_NOTUNIQUE)
9871 #### mac address generation
9872 # By generating here the mac address both the allocator and the hooks get
9873 # the real final mac address rather than the 'auto' or 'generate' value.
9874 # There is a race condition between the generation and the instance object
9875 # creation, which means that we know the mac is valid now, but we're not
9876 # sure it will be when we actually add the instance. If things go bad
9877 # adding the instance will abort because of a duplicate mac, and the
9878 # creation job will fail.
9879 for nic in self.nics:
9880 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9881 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9885 if self.op.iallocator is not None:
9886 self._RunAllocator()
9888 # Release all unneeded node locks
9889 _ReleaseLocks(self, locking.LEVEL_NODE,
9890 keep=filter(None, [self.op.pnode, self.op.snode,
9892 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9893 keep=filter(None, [self.op.pnode, self.op.snode,
9896 #### node related checks
9898 # check primary node
9899 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9900 assert self.pnode is not None, \
9901 "Cannot retrieve locked node %s" % self.op.pnode
9903 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9904 pnode.name, errors.ECODE_STATE)
9906 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9907 pnode.name, errors.ECODE_STATE)
9908 if not pnode.vm_capable:
9909 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9910 " '%s'" % pnode.name, errors.ECODE_STATE)
9912 self.secondaries = []
9914 # mirror node verification
9915 if self.op.disk_template in constants.DTS_INT_MIRROR:
9916 if self.op.snode == pnode.name:
9917 raise errors.OpPrereqError("The secondary node cannot be the"
9918 " primary node", errors.ECODE_INVAL)
9919 _CheckNodeOnline(self, self.op.snode)
9920 _CheckNodeNotDrained(self, self.op.snode)
9921 _CheckNodeVmCapable(self, self.op.snode)
9922 self.secondaries.append(self.op.snode)
9924 snode = self.cfg.GetNodeInfo(self.op.snode)
9925 if pnode.group != snode.group:
9926 self.LogWarning("The primary and secondary nodes are in two"
9927 " different node groups; the disk parameters"
9928 " from the first disk's node group will be"
9931 nodenames = [pnode.name] + self.secondaries
9933 # Verify instance specs
9934 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9936 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9937 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9938 constants.ISPEC_DISK_COUNT: len(self.disks),
9939 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9940 constants.ISPEC_NIC_COUNT: len(self.nics),
9941 constants.ISPEC_SPINDLE_USE: spindle_use,
9944 group_info = self.cfg.GetNodeGroup(pnode.group)
9945 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9946 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9947 if not self.op.ignore_ipolicy and res:
9948 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9949 " policy: %s") % (pnode.group,
9950 utils.CommaJoin(res)),
9953 if not self.adopt_disks:
9954 if self.op.disk_template == constants.DT_RBD:
9955 # _CheckRADOSFreeSpace() is just a placeholder.
9956 # Any function that checks prerequisites can be placed here.
9957 # Check if there is enough space on the RADOS cluster.
9958 _CheckRADOSFreeSpace()
9960 # Check lv size requirements, if not adopting
9961 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9962 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9964 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9965 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9966 disk[constants.IDISK_ADOPT])
9967 for disk in self.disks])
9968 if len(all_lvs) != len(self.disks):
9969 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9971 for lv_name in all_lvs:
9973 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9974 # to ReserveLV uses the same syntax
9975 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9976 except errors.ReservationError:
9977 raise errors.OpPrereqError("LV named %s used by another instance" %
9978 lv_name, errors.ECODE_NOTUNIQUE)
9980 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9981 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9983 node_lvs = self.rpc.call_lv_list([pnode.name],
9984 vg_names.payload.keys())[pnode.name]
9985 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9986 node_lvs = node_lvs.payload
9988 delta = all_lvs.difference(node_lvs.keys())
9990 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9991 utils.CommaJoin(delta),
9993 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9995 raise errors.OpPrereqError("Online logical volumes found, cannot"
9996 " adopt: %s" % utils.CommaJoin(online_lvs),
9998 # update the size of disk based on what is found
9999 for dsk in self.disks:
10000 dsk[constants.IDISK_SIZE] = \
10001 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10002 dsk[constants.IDISK_ADOPT])][0]))
10004 elif self.op.disk_template == constants.DT_BLOCK:
10005 # Normalize and de-duplicate device paths
10006 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10007 for disk in self.disks])
10008 if len(all_disks) != len(self.disks):
10009 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10010 errors.ECODE_INVAL)
10011 baddisks = [d for d in all_disks
10012 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10014 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10015 " cannot be adopted" %
10016 (", ".join(baddisks),
10017 constants.ADOPTABLE_BLOCKDEV_ROOT),
10018 errors.ECODE_INVAL)
10020 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10021 list(all_disks))[pnode.name]
10022 node_disks.Raise("Cannot get block device information from node %s" %
10024 node_disks = node_disks.payload
10025 delta = all_disks.difference(node_disks.keys())
10027 raise errors.OpPrereqError("Missing block device(s): %s" %
10028 utils.CommaJoin(delta),
10029 errors.ECODE_INVAL)
10030 for dsk in self.disks:
10031 dsk[constants.IDISK_SIZE] = \
10032 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10034 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10036 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10037 # check OS parameters (remotely)
10038 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10040 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10042 # memory check on primary node
10043 #TODO(dynmem): use MINMEM for checking
10045 _CheckNodeFreeMemory(self, self.pnode.name,
10046 "creating instance %s" % self.op.instance_name,
10047 self.be_full[constants.BE_MAXMEM],
10048 self.op.hypervisor)
10050 self.dry_run_result = list(nodenames)
10052 def Exec(self, feedback_fn):
10053 """Create and add the instance to the cluster.
10056 instance = self.op.instance_name
10057 pnode_name = self.pnode.name
10059 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10060 self.owned_locks(locking.LEVEL_NODE)), \
10061 "Node locks differ from node resource locks"
10063 ht_kind = self.op.hypervisor
10064 if ht_kind in constants.HTS_REQ_PORT:
10065 network_port = self.cfg.AllocatePort()
10067 network_port = None
10069 # This is ugly but we got a chicken-egg problem here
10070 # We can only take the group disk parameters, as the instance
10071 # has no disks yet (we are generating them right here).
10072 node = self.cfg.GetNodeInfo(pnode_name)
10073 nodegroup = self.cfg.GetNodeGroup(node.group)
10074 disks = _GenerateDiskTemplate(self,
10075 self.op.disk_template,
10076 instance, pnode_name,
10079 self.instance_file_storage_dir,
10080 self.op.file_driver,
10083 self.cfg.GetGroupDiskParams(nodegroup))
10085 iobj = objects.Instance(name=instance, os=self.op.os_type,
10086 primary_node=pnode_name,
10087 nics=self.nics, disks=disks,
10088 disk_template=self.op.disk_template,
10089 admin_state=constants.ADMINST_DOWN,
10090 network_port=network_port,
10091 beparams=self.op.beparams,
10092 hvparams=self.op.hvparams,
10093 hypervisor=self.op.hypervisor,
10094 osparams=self.op.osparams,
10098 for tag in self.op.tags:
10101 if self.adopt_disks:
10102 if self.op.disk_template == constants.DT_PLAIN:
10103 # rename LVs to the newly-generated names; we need to construct
10104 # 'fake' LV disks with the old data, plus the new unique_id
10105 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10107 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10108 rename_to.append(t_dsk.logical_id)
10109 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10110 self.cfg.SetDiskID(t_dsk, pnode_name)
10111 result = self.rpc.call_blockdev_rename(pnode_name,
10112 zip(tmp_disks, rename_to))
10113 result.Raise("Failed to rename adoped LVs")
10115 feedback_fn("* creating instance disks...")
10117 _CreateDisks(self, iobj)
10118 except errors.OpExecError:
10119 self.LogWarning("Device creation failed, reverting...")
10121 _RemoveDisks(self, iobj)
10123 self.cfg.ReleaseDRBDMinors(instance)
10126 feedback_fn("adding instance %s to cluster config" % instance)
10128 self.cfg.AddInstance(iobj, self.proc.GetECId())
10130 # Declare that we don't want to remove the instance lock anymore, as we've
10131 # added the instance to the config
10132 del self.remove_locks[locking.LEVEL_INSTANCE]
10134 if self.op.mode == constants.INSTANCE_IMPORT:
10135 # Release unused nodes
10136 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10138 # Release all nodes
10139 _ReleaseLocks(self, locking.LEVEL_NODE)
10142 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10143 feedback_fn("* wiping instance disks...")
10145 _WipeDisks(self, iobj)
10146 except errors.OpExecError, err:
10147 logging.exception("Wiping disks failed")
10148 self.LogWarning("Wiping instance disks failed (%s)", err)
10152 # Something is already wrong with the disks, don't do anything else
10154 elif self.op.wait_for_sync:
10155 disk_abort = not _WaitForSync(self, iobj)
10156 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10157 # make sure the disks are not degraded (still sync-ing is ok)
10158 feedback_fn("* checking mirrors status")
10159 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10164 _RemoveDisks(self, iobj)
10165 self.cfg.RemoveInstance(iobj.name)
10166 # Make sure the instance lock gets removed
10167 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10168 raise errors.OpExecError("There are some degraded disks for"
10171 # Release all node resource locks
10172 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10174 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10175 # we need to set the disks ID to the primary node, since the
10176 # preceding code might or might have not done it, depending on
10177 # disk template and other options
10178 for disk in iobj.disks:
10179 self.cfg.SetDiskID(disk, pnode_name)
10180 if self.op.mode == constants.INSTANCE_CREATE:
10181 if not self.op.no_install:
10182 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10183 not self.op.wait_for_sync)
10185 feedback_fn("* pausing disk sync to install instance OS")
10186 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10189 for idx, success in enumerate(result.payload):
10191 logging.warn("pause-sync of instance %s for disk %d failed",
10194 feedback_fn("* running the instance OS create scripts...")
10195 # FIXME: pass debug option from opcode to backend
10197 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10198 self.op.debug_level)
10200 feedback_fn("* resuming disk sync")
10201 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10204 for idx, success in enumerate(result.payload):
10206 logging.warn("resume-sync of instance %s for disk %d failed",
10209 os_add_result.Raise("Could not add os for instance %s"
10210 " on node %s" % (instance, pnode_name))
10213 if self.op.mode == constants.INSTANCE_IMPORT:
10214 feedback_fn("* running the instance OS import scripts...")
10218 for idx, image in enumerate(self.src_images):
10222 # FIXME: pass debug option from opcode to backend
10223 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10224 constants.IEIO_FILE, (image, ),
10225 constants.IEIO_SCRIPT,
10226 (iobj.disks[idx], idx),
10228 transfers.append(dt)
10231 masterd.instance.TransferInstanceData(self, feedback_fn,
10232 self.op.src_node, pnode_name,
10233 self.pnode.secondary_ip,
10235 if not compat.all(import_result):
10236 self.LogWarning("Some disks for instance %s on node %s were not"
10237 " imported successfully" % (instance, pnode_name))
10239 rename_from = self._old_instance_name
10241 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10242 feedback_fn("* preparing remote import...")
10243 # The source cluster will stop the instance before attempting to make
10244 # a connection. In some cases stopping an instance can take a long
10245 # time, hence the shutdown timeout is added to the connection
10247 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10248 self.op.source_shutdown_timeout)
10249 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10251 assert iobj.primary_node == self.pnode.name
10253 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10254 self.source_x509_ca,
10255 self._cds, timeouts)
10256 if not compat.all(disk_results):
10257 # TODO: Should the instance still be started, even if some disks
10258 # failed to import (valid for local imports, too)?
10259 self.LogWarning("Some disks for instance %s on node %s were not"
10260 " imported successfully" % (instance, pnode_name))
10262 rename_from = self.source_instance_name
10265 # also checked in the prereq part
10266 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10269 # Run rename script on newly imported instance
10270 assert iobj.name == instance
10271 feedback_fn("Running rename script for %s" % instance)
10272 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10274 self.op.debug_level)
10275 if result.fail_msg:
10276 self.LogWarning("Failed to run rename script for %s on node"
10277 " %s: %s" % (instance, pnode_name, result.fail_msg))
10279 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10282 iobj.admin_state = constants.ADMINST_UP
10283 self.cfg.Update(iobj, feedback_fn)
10284 logging.info("Starting instance %s on node %s", instance, pnode_name)
10285 feedback_fn("* starting instance...")
10286 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10288 result.Raise("Could not start instance")
10290 return list(iobj.all_nodes)
10293 def _CheckRADOSFreeSpace():
10294 """Compute disk size requirements inside the RADOS cluster.
10297 # For the RADOS cluster we assume there is always enough space.
10301 class LUInstanceConsole(NoHooksLU):
10302 """Connect to an instance's console.
10304 This is somewhat special in that it returns the command line that
10305 you need to run on the master node in order to connect to the
10311 def ExpandNames(self):
10312 self.share_locks = _ShareAll()
10313 self._ExpandAndLockInstance()
10315 def CheckPrereq(self):
10316 """Check prerequisites.
10318 This checks that the instance is in the cluster.
10321 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10322 assert self.instance is not None, \
10323 "Cannot retrieve locked instance %s" % self.op.instance_name
10324 _CheckNodeOnline(self, self.instance.primary_node)
10326 def Exec(self, feedback_fn):
10327 """Connect to the console of an instance
10330 instance = self.instance
10331 node = instance.primary_node
10333 node_insts = self.rpc.call_instance_list([node],
10334 [instance.hypervisor])[node]
10335 node_insts.Raise("Can't get node information from %s" % node)
10337 if instance.name not in node_insts.payload:
10338 if instance.admin_state == constants.ADMINST_UP:
10339 state = constants.INSTST_ERRORDOWN
10340 elif instance.admin_state == constants.ADMINST_DOWN:
10341 state = constants.INSTST_ADMINDOWN
10343 state = constants.INSTST_ADMINOFFLINE
10344 raise errors.OpExecError("Instance %s is not running (state %s)" %
10345 (instance.name, state))
10347 logging.debug("Connecting to console of %s on %s", instance.name, node)
10349 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10352 def _GetInstanceConsole(cluster, instance):
10353 """Returns console information for an instance.
10355 @type cluster: L{objects.Cluster}
10356 @type instance: L{objects.Instance}
10360 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10361 # beparams and hvparams are passed separately, to avoid editing the
10362 # instance and then saving the defaults in the instance itself.
10363 hvparams = cluster.FillHV(instance)
10364 beparams = cluster.FillBE(instance)
10365 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10367 assert console.instance == instance.name
10368 assert console.Validate()
10370 return console.ToDict()
10373 class LUInstanceReplaceDisks(LogicalUnit):
10374 """Replace the disks of an instance.
10377 HPATH = "mirrors-replace"
10378 HTYPE = constants.HTYPE_INSTANCE
10381 def CheckArguments(self):
10382 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10383 self.op.iallocator)
10385 def ExpandNames(self):
10386 self._ExpandAndLockInstance()
10388 assert locking.LEVEL_NODE not in self.needed_locks
10389 assert locking.LEVEL_NODE_RES not in self.needed_locks
10390 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10392 assert self.op.iallocator is None or self.op.remote_node is None, \
10393 "Conflicting options"
10395 if self.op.remote_node is not None:
10396 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10398 # Warning: do not remove the locking of the new secondary here
10399 # unless DRBD8.AddChildren is changed to work in parallel;
10400 # currently it doesn't since parallel invocations of
10401 # FindUnusedMinor will conflict
10402 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10403 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10405 self.needed_locks[locking.LEVEL_NODE] = []
10406 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10408 if self.op.iallocator is not None:
10409 # iallocator will select a new node in the same group
10410 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10412 self.needed_locks[locking.LEVEL_NODE_RES] = []
10414 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10415 self.op.iallocator, self.op.remote_node,
10416 self.op.disks, False, self.op.early_release,
10417 self.op.ignore_ipolicy)
10419 self.tasklets = [self.replacer]
10421 def DeclareLocks(self, level):
10422 if level == locking.LEVEL_NODEGROUP:
10423 assert self.op.remote_node is None
10424 assert self.op.iallocator is not None
10425 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10427 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10428 # Lock all groups used by instance optimistically; this requires going
10429 # via the node before it's locked, requiring verification later on
10430 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10431 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10433 elif level == locking.LEVEL_NODE:
10434 if self.op.iallocator is not None:
10435 assert self.op.remote_node is None
10436 assert not self.needed_locks[locking.LEVEL_NODE]
10438 # Lock member nodes of all locked groups
10439 self.needed_locks[locking.LEVEL_NODE] = [node_name
10440 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10441 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10443 self._LockInstancesNodes()
10444 elif level == locking.LEVEL_NODE_RES:
10446 self.needed_locks[locking.LEVEL_NODE_RES] = \
10447 self.needed_locks[locking.LEVEL_NODE]
10449 def BuildHooksEnv(self):
10450 """Build hooks env.
10452 This runs on the master, the primary and all the secondaries.
10455 instance = self.replacer.instance
10457 "MODE": self.op.mode,
10458 "NEW_SECONDARY": self.op.remote_node,
10459 "OLD_SECONDARY": instance.secondary_nodes[0],
10461 env.update(_BuildInstanceHookEnvByObject(self, instance))
10464 def BuildHooksNodes(self):
10465 """Build hooks nodes.
10468 instance = self.replacer.instance
10470 self.cfg.GetMasterNode(),
10471 instance.primary_node,
10473 if self.op.remote_node is not None:
10474 nl.append(self.op.remote_node)
10477 def CheckPrereq(self):
10478 """Check prerequisites.
10481 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10482 self.op.iallocator is None)
10484 # Verify if node group locks are still correct
10485 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10487 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10489 return LogicalUnit.CheckPrereq(self)
10492 class TLReplaceDisks(Tasklet):
10493 """Replaces disks for an instance.
10495 Note: Locking is not within the scope of this class.
10498 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10499 disks, delay_iallocator, early_release, ignore_ipolicy):
10500 """Initializes this class.
10503 Tasklet.__init__(self, lu)
10506 self.instance_name = instance_name
10508 self.iallocator_name = iallocator_name
10509 self.remote_node = remote_node
10511 self.delay_iallocator = delay_iallocator
10512 self.early_release = early_release
10513 self.ignore_ipolicy = ignore_ipolicy
10516 self.instance = None
10517 self.new_node = None
10518 self.target_node = None
10519 self.other_node = None
10520 self.remote_node_info = None
10521 self.node_secondary_ip = None
10524 def CheckArguments(mode, remote_node, iallocator):
10525 """Helper function for users of this class.
10528 # check for valid parameter combination
10529 if mode == constants.REPLACE_DISK_CHG:
10530 if remote_node is None and iallocator is None:
10531 raise errors.OpPrereqError("When changing the secondary either an"
10532 " iallocator script must be used or the"
10533 " new node given", errors.ECODE_INVAL)
10535 if remote_node is not None and iallocator is not None:
10536 raise errors.OpPrereqError("Give either the iallocator or the new"
10537 " secondary, not both", errors.ECODE_INVAL)
10539 elif remote_node is not None or iallocator is not None:
10540 # Not replacing the secondary
10541 raise errors.OpPrereqError("The iallocator and new node options can"
10542 " only be used when changing the"
10543 " secondary node", errors.ECODE_INVAL)
10546 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10547 """Compute a new secondary node using an IAllocator.
10550 ial = IAllocator(lu.cfg, lu.rpc,
10551 mode=constants.IALLOCATOR_MODE_RELOC,
10552 name=instance_name,
10553 relocate_from=list(relocate_from))
10555 ial.Run(iallocator_name)
10557 if not ial.success:
10558 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10559 " %s" % (iallocator_name, ial.info),
10560 errors.ECODE_NORES)
10562 if len(ial.result) != ial.required_nodes:
10563 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10564 " of nodes (%s), required %s" %
10566 len(ial.result), ial.required_nodes),
10567 errors.ECODE_FAULT)
10569 remote_node_name = ial.result[0]
10571 lu.LogInfo("Selected new secondary for instance '%s': %s",
10572 instance_name, remote_node_name)
10574 return remote_node_name
10576 def _FindFaultyDisks(self, node_name):
10577 """Wrapper for L{_FindFaultyInstanceDisks}.
10580 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10583 def _CheckDisksActivated(self, instance):
10584 """Checks if the instance disks are activated.
10586 @param instance: The instance to check disks
10587 @return: True if they are activated, False otherwise
10590 nodes = instance.all_nodes
10592 for idx, dev in enumerate(instance.disks):
10594 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10595 self.cfg.SetDiskID(dev, node)
10597 result = _BlockdevFind(self, node, dev, instance)
10601 elif result.fail_msg or not result.payload:
10606 def CheckPrereq(self):
10607 """Check prerequisites.
10609 This checks that the instance is in the cluster.
10612 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10613 assert instance is not None, \
10614 "Cannot retrieve locked instance %s" % self.instance_name
10616 if instance.disk_template != constants.DT_DRBD8:
10617 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10618 " instances", errors.ECODE_INVAL)
10620 if len(instance.secondary_nodes) != 1:
10621 raise errors.OpPrereqError("The instance has a strange layout,"
10622 " expected one secondary but found %d" %
10623 len(instance.secondary_nodes),
10624 errors.ECODE_FAULT)
10626 if not self.delay_iallocator:
10627 self._CheckPrereq2()
10629 def _CheckPrereq2(self):
10630 """Check prerequisites, second part.
10632 This function should always be part of CheckPrereq. It was separated and is
10633 now called from Exec because during node evacuation iallocator was only
10634 called with an unmodified cluster model, not taking planned changes into
10638 instance = self.instance
10639 secondary_node = instance.secondary_nodes[0]
10641 if self.iallocator_name is None:
10642 remote_node = self.remote_node
10644 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10645 instance.name, instance.secondary_nodes)
10647 if remote_node is None:
10648 self.remote_node_info = None
10650 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10651 "Remote node '%s' is not locked" % remote_node
10653 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10654 assert self.remote_node_info is not None, \
10655 "Cannot retrieve locked node %s" % remote_node
10657 if remote_node == self.instance.primary_node:
10658 raise errors.OpPrereqError("The specified node is the primary node of"
10659 " the instance", errors.ECODE_INVAL)
10661 if remote_node == secondary_node:
10662 raise errors.OpPrereqError("The specified node is already the"
10663 " secondary node of the instance",
10664 errors.ECODE_INVAL)
10666 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10667 constants.REPLACE_DISK_CHG):
10668 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10669 errors.ECODE_INVAL)
10671 if self.mode == constants.REPLACE_DISK_AUTO:
10672 if not self._CheckDisksActivated(instance):
10673 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10674 " first" % self.instance_name,
10675 errors.ECODE_STATE)
10676 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10677 faulty_secondary = self._FindFaultyDisks(secondary_node)
10679 if faulty_primary and faulty_secondary:
10680 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10681 " one node and can not be repaired"
10682 " automatically" % self.instance_name,
10683 errors.ECODE_STATE)
10686 self.disks = faulty_primary
10687 self.target_node = instance.primary_node
10688 self.other_node = secondary_node
10689 check_nodes = [self.target_node, self.other_node]
10690 elif faulty_secondary:
10691 self.disks = faulty_secondary
10692 self.target_node = secondary_node
10693 self.other_node = instance.primary_node
10694 check_nodes = [self.target_node, self.other_node]
10700 # Non-automatic modes
10701 if self.mode == constants.REPLACE_DISK_PRI:
10702 self.target_node = instance.primary_node
10703 self.other_node = secondary_node
10704 check_nodes = [self.target_node, self.other_node]
10706 elif self.mode == constants.REPLACE_DISK_SEC:
10707 self.target_node = secondary_node
10708 self.other_node = instance.primary_node
10709 check_nodes = [self.target_node, self.other_node]
10711 elif self.mode == constants.REPLACE_DISK_CHG:
10712 self.new_node = remote_node
10713 self.other_node = instance.primary_node
10714 self.target_node = secondary_node
10715 check_nodes = [self.new_node, self.other_node]
10717 _CheckNodeNotDrained(self.lu, remote_node)
10718 _CheckNodeVmCapable(self.lu, remote_node)
10720 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10721 assert old_node_info is not None
10722 if old_node_info.offline and not self.early_release:
10723 # doesn't make sense to delay the release
10724 self.early_release = True
10725 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10726 " early-release mode", secondary_node)
10729 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10732 # If not specified all disks should be replaced
10734 self.disks = range(len(self.instance.disks))
10736 # TODO: This is ugly, but right now we can't distinguish between internal
10737 # submitted opcode and external one. We should fix that.
10738 if self.remote_node_info:
10739 # We change the node, lets verify it still meets instance policy
10740 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10741 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10743 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10744 ignore=self.ignore_ipolicy)
10746 for node in check_nodes:
10747 _CheckNodeOnline(self.lu, node)
10749 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10752 if node_name is not None)
10754 # Release unneeded node and node resource locks
10755 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10756 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10758 # Release any owned node group
10759 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10760 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10762 # Check whether disks are valid
10763 for disk_idx in self.disks:
10764 instance.FindDisk(disk_idx)
10766 # Get secondary node IP addresses
10767 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10768 in self.cfg.GetMultiNodeInfo(touched_nodes))
10770 def Exec(self, feedback_fn):
10771 """Execute disk replacement.
10773 This dispatches the disk replacement to the appropriate handler.
10776 if self.delay_iallocator:
10777 self._CheckPrereq2()
10780 # Verify owned locks before starting operation
10781 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10782 assert set(owned_nodes) == set(self.node_secondary_ip), \
10783 ("Incorrect node locks, owning %s, expected %s" %
10784 (owned_nodes, self.node_secondary_ip.keys()))
10785 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10786 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10788 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10789 assert list(owned_instances) == [self.instance_name], \
10790 "Instance '%s' not locked" % self.instance_name
10792 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10793 "Should not own any node group lock at this point"
10796 feedback_fn("No disks need replacement")
10799 feedback_fn("Replacing disk(s) %s for %s" %
10800 (utils.CommaJoin(self.disks), self.instance.name))
10802 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10804 # Activate the instance disks if we're replacing them on a down instance
10806 _StartInstanceDisks(self.lu, self.instance, True)
10809 # Should we replace the secondary node?
10810 if self.new_node is not None:
10811 fn = self._ExecDrbd8Secondary
10813 fn = self._ExecDrbd8DiskOnly
10815 result = fn(feedback_fn)
10817 # Deactivate the instance disks if we're replacing them on a
10820 _SafeShutdownInstanceDisks(self.lu, self.instance)
10822 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10825 # Verify owned locks
10826 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10827 nodes = frozenset(self.node_secondary_ip)
10828 assert ((self.early_release and not owned_nodes) or
10829 (not self.early_release and not (set(owned_nodes) - nodes))), \
10830 ("Not owning the correct locks, early_release=%s, owned=%r,"
10831 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10835 def _CheckVolumeGroup(self, nodes):
10836 self.lu.LogInfo("Checking volume groups")
10838 vgname = self.cfg.GetVGName()
10840 # Make sure volume group exists on all involved nodes
10841 results = self.rpc.call_vg_list(nodes)
10843 raise errors.OpExecError("Can't list volume groups on the nodes")
10846 res = results[node]
10847 res.Raise("Error checking node %s" % node)
10848 if vgname not in res.payload:
10849 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10852 def _CheckDisksExistence(self, nodes):
10853 # Check disk existence
10854 for idx, dev in enumerate(self.instance.disks):
10855 if idx not in self.disks:
10859 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10860 self.cfg.SetDiskID(dev, node)
10862 result = _BlockdevFind(self, node, dev, self.instance)
10864 msg = result.fail_msg
10865 if msg or not result.payload:
10867 msg = "disk not found"
10868 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10871 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10872 for idx, dev in enumerate(self.instance.disks):
10873 if idx not in self.disks:
10876 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10879 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10880 on_primary, ldisk=ldisk):
10881 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10882 " replace disks for instance %s" %
10883 (node_name, self.instance.name))
10885 def _CreateNewStorage(self, node_name):
10886 """Create new storage on the primary or secondary node.
10888 This is only used for same-node replaces, not for changing the
10889 secondary node, hence we don't want to modify the existing disk.
10894 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10895 for idx, dev in enumerate(disks):
10896 if idx not in self.disks:
10899 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10901 self.cfg.SetDiskID(dev, node_name)
10903 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10904 names = _GenerateUniqueNames(self.lu, lv_names)
10906 (data_disk, meta_disk) = dev.children
10907 vg_data = data_disk.logical_id[0]
10908 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10909 logical_id=(vg_data, names[0]),
10910 params=data_disk.params)
10911 vg_meta = meta_disk.logical_id[0]
10912 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10913 logical_id=(vg_meta, names[1]),
10914 params=meta_disk.params)
10916 new_lvs = [lv_data, lv_meta]
10917 old_lvs = [child.Copy() for child in dev.children]
10918 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10920 # we pass force_create=True to force the LVM creation
10921 for new_lv in new_lvs:
10922 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10923 _GetInstanceInfoText(self.instance), False)
10927 def _CheckDevices(self, node_name, iv_names):
10928 for name, (dev, _, _) in iv_names.iteritems():
10929 self.cfg.SetDiskID(dev, node_name)
10931 result = _BlockdevFind(self, node_name, dev, self.instance)
10933 msg = result.fail_msg
10934 if msg or not result.payload:
10936 msg = "disk not found"
10937 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10940 if result.payload.is_degraded:
10941 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10943 def _RemoveOldStorage(self, node_name, iv_names):
10944 for name, (_, old_lvs, _) in iv_names.iteritems():
10945 self.lu.LogInfo("Remove logical volumes for %s" % name)
10948 self.cfg.SetDiskID(lv, node_name)
10950 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10952 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10953 hint="remove unused LVs manually")
10955 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10956 """Replace a disk on the primary or secondary for DRBD 8.
10958 The algorithm for replace is quite complicated:
10960 1. for each disk to be replaced:
10962 1. create new LVs on the target node with unique names
10963 1. detach old LVs from the drbd device
10964 1. rename old LVs to name_replaced.<time_t>
10965 1. rename new LVs to old LVs
10966 1. attach the new LVs (with the old names now) to the drbd device
10968 1. wait for sync across all devices
10970 1. for each modified disk:
10972 1. remove old LVs (which have the name name_replaces.<time_t>)
10974 Failures are not very well handled.
10979 # Step: check device activation
10980 self.lu.LogStep(1, steps_total, "Check device existence")
10981 self._CheckDisksExistence([self.other_node, self.target_node])
10982 self._CheckVolumeGroup([self.target_node, self.other_node])
10984 # Step: check other node consistency
10985 self.lu.LogStep(2, steps_total, "Check peer consistency")
10986 self._CheckDisksConsistency(self.other_node,
10987 self.other_node == self.instance.primary_node,
10990 # Step: create new storage
10991 self.lu.LogStep(3, steps_total, "Allocate new storage")
10992 iv_names = self._CreateNewStorage(self.target_node)
10994 # Step: for each lv, detach+rename*2+attach
10995 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10996 for dev, old_lvs, new_lvs in iv_names.itervalues():
10997 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10999 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11001 result.Raise("Can't detach drbd from local storage on node"
11002 " %s for device %s" % (self.target_node, dev.iv_name))
11004 #cfg.Update(instance)
11006 # ok, we created the new LVs, so now we know we have the needed
11007 # storage; as such, we proceed on the target node to rename
11008 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11009 # using the assumption that logical_id == physical_id (which in
11010 # turn is the unique_id on that node)
11012 # FIXME(iustin): use a better name for the replaced LVs
11013 temp_suffix = int(time.time())
11014 ren_fn = lambda d, suff: (d.physical_id[0],
11015 d.physical_id[1] + "_replaced-%s" % suff)
11017 # Build the rename list based on what LVs exist on the node
11018 rename_old_to_new = []
11019 for to_ren in old_lvs:
11020 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11021 if not result.fail_msg and result.payload:
11023 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11025 self.lu.LogInfo("Renaming the old LVs on the target node")
11026 result = self.rpc.call_blockdev_rename(self.target_node,
11028 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11030 # Now we rename the new LVs to the old LVs
11031 self.lu.LogInfo("Renaming the new LVs on the target node")
11032 rename_new_to_old = [(new, old.physical_id)
11033 for old, new in zip(old_lvs, new_lvs)]
11034 result = self.rpc.call_blockdev_rename(self.target_node,
11036 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11038 # Intermediate steps of in memory modifications
11039 for old, new in zip(old_lvs, new_lvs):
11040 new.logical_id = old.logical_id
11041 self.cfg.SetDiskID(new, self.target_node)
11043 # We need to modify old_lvs so that removal later removes the
11044 # right LVs, not the newly added ones; note that old_lvs is a
11046 for disk in old_lvs:
11047 disk.logical_id = ren_fn(disk, temp_suffix)
11048 self.cfg.SetDiskID(disk, self.target_node)
11050 # Now that the new lvs have the old name, we can add them to the device
11051 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11052 result = self.rpc.call_blockdev_addchildren(self.target_node,
11053 (dev, self.instance), new_lvs)
11054 msg = result.fail_msg
11056 for new_lv in new_lvs:
11057 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11060 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11061 hint=("cleanup manually the unused logical"
11063 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11065 cstep = itertools.count(5)
11067 if self.early_release:
11068 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11069 self._RemoveOldStorage(self.target_node, iv_names)
11070 # TODO: Check if releasing locks early still makes sense
11071 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11073 # Release all resource locks except those used by the instance
11074 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11075 keep=self.node_secondary_ip.keys())
11077 # Release all node locks while waiting for sync
11078 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11080 # TODO: Can the instance lock be downgraded here? Take the optional disk
11081 # shutdown in the caller into consideration.
11084 # This can fail as the old devices are degraded and _WaitForSync
11085 # does a combined result over all disks, so we don't check its return value
11086 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11087 _WaitForSync(self.lu, self.instance)
11089 # Check all devices manually
11090 self._CheckDevices(self.instance.primary_node, iv_names)
11092 # Step: remove old storage
11093 if not self.early_release:
11094 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11095 self._RemoveOldStorage(self.target_node, iv_names)
11097 def _ExecDrbd8Secondary(self, feedback_fn):
11098 """Replace the secondary node for DRBD 8.
11100 The algorithm for replace is quite complicated:
11101 - for all disks of the instance:
11102 - create new LVs on the new node with same names
11103 - shutdown the drbd device on the old secondary
11104 - disconnect the drbd network on the primary
11105 - create the drbd device on the new secondary
11106 - network attach the drbd on the primary, using an artifice:
11107 the drbd code for Attach() will connect to the network if it
11108 finds a device which is connected to the good local disks but
11109 not network enabled
11110 - wait for sync across all devices
11111 - remove all disks from the old secondary
11113 Failures are not very well handled.
11118 pnode = self.instance.primary_node
11120 # Step: check device activation
11121 self.lu.LogStep(1, steps_total, "Check device existence")
11122 self._CheckDisksExistence([self.instance.primary_node])
11123 self._CheckVolumeGroup([self.instance.primary_node])
11125 # Step: check other node consistency
11126 self.lu.LogStep(2, steps_total, "Check peer consistency")
11127 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11129 # Step: create new storage
11130 self.lu.LogStep(3, steps_total, "Allocate new storage")
11131 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11132 for idx, dev in enumerate(disks):
11133 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11134 (self.new_node, idx))
11135 # we pass force_create=True to force LVM creation
11136 for new_lv in dev.children:
11137 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11138 True, _GetInstanceInfoText(self.instance), False)
11140 # Step 4: dbrd minors and drbd setups changes
11141 # after this, we must manually remove the drbd minors on both the
11142 # error and the success paths
11143 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11144 minors = self.cfg.AllocateDRBDMinor([self.new_node
11145 for dev in self.instance.disks],
11146 self.instance.name)
11147 logging.debug("Allocated minors %r", minors)
11150 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11151 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11152 (self.new_node, idx))
11153 # create new devices on new_node; note that we create two IDs:
11154 # one without port, so the drbd will be activated without
11155 # networking information on the new node at this stage, and one
11156 # with network, for the latter activation in step 4
11157 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11158 if self.instance.primary_node == o_node1:
11161 assert self.instance.primary_node == o_node2, "Three-node instance?"
11164 new_alone_id = (self.instance.primary_node, self.new_node, None,
11165 p_minor, new_minor, o_secret)
11166 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11167 p_minor, new_minor, o_secret)
11169 iv_names[idx] = (dev, dev.children, new_net_id)
11170 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11172 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11173 logical_id=new_alone_id,
11174 children=dev.children,
11177 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11180 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11182 _GetInstanceInfoText(self.instance), False)
11183 except errors.GenericError:
11184 self.cfg.ReleaseDRBDMinors(self.instance.name)
11187 # We have new devices, shutdown the drbd on the old secondary
11188 for idx, dev in enumerate(self.instance.disks):
11189 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11190 self.cfg.SetDiskID(dev, self.target_node)
11191 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11192 (dev, self.instance)).fail_msg
11194 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11195 "node: %s" % (idx, msg),
11196 hint=("Please cleanup this device manually as"
11197 " soon as possible"))
11199 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11200 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11201 self.instance.disks)[pnode]
11203 msg = result.fail_msg
11205 # detaches didn't succeed (unlikely)
11206 self.cfg.ReleaseDRBDMinors(self.instance.name)
11207 raise errors.OpExecError("Can't detach the disks from the network on"
11208 " old node: %s" % (msg,))
11210 # if we managed to detach at least one, we update all the disks of
11211 # the instance to point to the new secondary
11212 self.lu.LogInfo("Updating instance configuration")
11213 for dev, _, new_logical_id in iv_names.itervalues():
11214 dev.logical_id = new_logical_id
11215 self.cfg.SetDiskID(dev, self.instance.primary_node)
11217 self.cfg.Update(self.instance, feedback_fn)
11219 # Release all node locks (the configuration has been updated)
11220 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11222 # and now perform the drbd attach
11223 self.lu.LogInfo("Attaching primary drbds to new secondary"
11224 " (standalone => connected)")
11225 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11227 self.node_secondary_ip,
11228 (self.instance.disks, self.instance),
11229 self.instance.name,
11231 for to_node, to_result in result.items():
11232 msg = to_result.fail_msg
11234 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11236 hint=("please do a gnt-instance info to see the"
11237 " status of disks"))
11239 cstep = itertools.count(5)
11241 if self.early_release:
11242 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11243 self._RemoveOldStorage(self.target_node, iv_names)
11244 # TODO: Check if releasing locks early still makes sense
11245 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11247 # Release all resource locks except those used by the instance
11248 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11249 keep=self.node_secondary_ip.keys())
11251 # TODO: Can the instance lock be downgraded here? Take the optional disk
11252 # shutdown in the caller into consideration.
11255 # This can fail as the old devices are degraded and _WaitForSync
11256 # does a combined result over all disks, so we don't check its return value
11257 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11258 _WaitForSync(self.lu, self.instance)
11260 # Check all devices manually
11261 self._CheckDevices(self.instance.primary_node, iv_names)
11263 # Step: remove old storage
11264 if not self.early_release:
11265 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11266 self._RemoveOldStorage(self.target_node, iv_names)
11269 class LURepairNodeStorage(NoHooksLU):
11270 """Repairs the volume group on a node.
11275 def CheckArguments(self):
11276 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11278 storage_type = self.op.storage_type
11280 if (constants.SO_FIX_CONSISTENCY not in
11281 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11282 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11283 " repaired" % storage_type,
11284 errors.ECODE_INVAL)
11286 def ExpandNames(self):
11287 self.needed_locks = {
11288 locking.LEVEL_NODE: [self.op.node_name],
11291 def _CheckFaultyDisks(self, instance, node_name):
11292 """Ensure faulty disks abort the opcode or at least warn."""
11294 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11296 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11297 " node '%s'" % (instance.name, node_name),
11298 errors.ECODE_STATE)
11299 except errors.OpPrereqError, err:
11300 if self.op.ignore_consistency:
11301 self.proc.LogWarning(str(err.args[0]))
11305 def CheckPrereq(self):
11306 """Check prerequisites.
11309 # Check whether any instance on this node has faulty disks
11310 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11311 if inst.admin_state != constants.ADMINST_UP:
11313 check_nodes = set(inst.all_nodes)
11314 check_nodes.discard(self.op.node_name)
11315 for inst_node_name in check_nodes:
11316 self._CheckFaultyDisks(inst, inst_node_name)
11318 def Exec(self, feedback_fn):
11319 feedback_fn("Repairing storage unit '%s' on %s ..." %
11320 (self.op.name, self.op.node_name))
11322 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11323 result = self.rpc.call_storage_execute(self.op.node_name,
11324 self.op.storage_type, st_args,
11326 constants.SO_FIX_CONSISTENCY)
11327 result.Raise("Failed to repair storage unit '%s' on %s" %
11328 (self.op.name, self.op.node_name))
11331 class LUNodeEvacuate(NoHooksLU):
11332 """Evacuates instances off a list of nodes.
11337 _MODE2IALLOCATOR = {
11338 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11339 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11340 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11342 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11343 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11344 constants.IALLOCATOR_NEVAC_MODES)
11346 def CheckArguments(self):
11347 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11349 def ExpandNames(self):
11350 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11352 if self.op.remote_node is not None:
11353 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11354 assert self.op.remote_node
11356 if self.op.remote_node == self.op.node_name:
11357 raise errors.OpPrereqError("Can not use evacuated node as a new"
11358 " secondary node", errors.ECODE_INVAL)
11360 if self.op.mode != constants.NODE_EVAC_SEC:
11361 raise errors.OpPrereqError("Without the use of an iallocator only"
11362 " secondary instances can be evacuated",
11363 errors.ECODE_INVAL)
11366 self.share_locks = _ShareAll()
11367 self.needed_locks = {
11368 locking.LEVEL_INSTANCE: [],
11369 locking.LEVEL_NODEGROUP: [],
11370 locking.LEVEL_NODE: [],
11373 # Determine nodes (via group) optimistically, needs verification once locks
11374 # have been acquired
11375 self.lock_nodes = self._DetermineNodes()
11377 def _DetermineNodes(self):
11378 """Gets the list of nodes to operate on.
11381 if self.op.remote_node is None:
11382 # Iallocator will choose any node(s) in the same group
11383 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11385 group_nodes = frozenset([self.op.remote_node])
11387 # Determine nodes to be locked
11388 return set([self.op.node_name]) | group_nodes
11390 def _DetermineInstances(self):
11391 """Builds list of instances to operate on.
11394 assert self.op.mode in constants.NODE_EVAC_MODES
11396 if self.op.mode == constants.NODE_EVAC_PRI:
11397 # Primary instances only
11398 inst_fn = _GetNodePrimaryInstances
11399 assert self.op.remote_node is None, \
11400 "Evacuating primary instances requires iallocator"
11401 elif self.op.mode == constants.NODE_EVAC_SEC:
11402 # Secondary instances only
11403 inst_fn = _GetNodeSecondaryInstances
11406 assert self.op.mode == constants.NODE_EVAC_ALL
11407 inst_fn = _GetNodeInstances
11408 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11410 raise errors.OpPrereqError("Due to an issue with the iallocator"
11411 " interface it is not possible to evacuate"
11412 " all instances at once; specify explicitly"
11413 " whether to evacuate primary or secondary"
11415 errors.ECODE_INVAL)
11417 return inst_fn(self.cfg, self.op.node_name)
11419 def DeclareLocks(self, level):
11420 if level == locking.LEVEL_INSTANCE:
11421 # Lock instances optimistically, needs verification once node and group
11422 # locks have been acquired
11423 self.needed_locks[locking.LEVEL_INSTANCE] = \
11424 set(i.name for i in self._DetermineInstances())
11426 elif level == locking.LEVEL_NODEGROUP:
11427 # Lock node groups for all potential target nodes optimistically, needs
11428 # verification once nodes have been acquired
11429 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11430 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11432 elif level == locking.LEVEL_NODE:
11433 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11435 def CheckPrereq(self):
11437 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11438 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11439 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11441 need_nodes = self._DetermineNodes()
11443 if not owned_nodes.issuperset(need_nodes):
11444 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11445 " locks were acquired, current nodes are"
11446 " are '%s', used to be '%s'; retry the"
11448 (self.op.node_name,
11449 utils.CommaJoin(need_nodes),
11450 utils.CommaJoin(owned_nodes)),
11451 errors.ECODE_STATE)
11453 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11454 if owned_groups != wanted_groups:
11455 raise errors.OpExecError("Node groups changed since locks were acquired,"
11456 " current groups are '%s', used to be '%s';"
11457 " retry the operation" %
11458 (utils.CommaJoin(wanted_groups),
11459 utils.CommaJoin(owned_groups)))
11461 # Determine affected instances
11462 self.instances = self._DetermineInstances()
11463 self.instance_names = [i.name for i in self.instances]
11465 if set(self.instance_names) != owned_instances:
11466 raise errors.OpExecError("Instances on node '%s' changed since locks"
11467 " were acquired, current instances are '%s',"
11468 " used to be '%s'; retry the operation" %
11469 (self.op.node_name,
11470 utils.CommaJoin(self.instance_names),
11471 utils.CommaJoin(owned_instances)))
11473 if self.instance_names:
11474 self.LogInfo("Evacuating instances from node '%s': %s",
11476 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11478 self.LogInfo("No instances to evacuate from node '%s'",
11481 if self.op.remote_node is not None:
11482 for i in self.instances:
11483 if i.primary_node == self.op.remote_node:
11484 raise errors.OpPrereqError("Node %s is the primary node of"
11485 " instance %s, cannot use it as"
11487 (self.op.remote_node, i.name),
11488 errors.ECODE_INVAL)
11490 def Exec(self, feedback_fn):
11491 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11493 if not self.instance_names:
11494 # No instances to evacuate
11497 elif self.op.iallocator is not None:
11498 # TODO: Implement relocation to other group
11499 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11500 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11501 instances=list(self.instance_names))
11503 ial.Run(self.op.iallocator)
11505 if not ial.success:
11506 raise errors.OpPrereqError("Can't compute node evacuation using"
11507 " iallocator '%s': %s" %
11508 (self.op.iallocator, ial.info),
11509 errors.ECODE_NORES)
11511 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11513 elif self.op.remote_node is not None:
11514 assert self.op.mode == constants.NODE_EVAC_SEC
11516 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11517 remote_node=self.op.remote_node,
11519 mode=constants.REPLACE_DISK_CHG,
11520 early_release=self.op.early_release)]
11521 for instance_name in self.instance_names
11525 raise errors.ProgrammerError("No iallocator or remote node")
11527 return ResultWithJobs(jobs)
11530 def _SetOpEarlyRelease(early_release, op):
11531 """Sets C{early_release} flag on opcodes if available.
11535 op.early_release = early_release
11536 except AttributeError:
11537 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11542 def _NodeEvacDest(use_nodes, group, nodes):
11543 """Returns group or nodes depending on caller's choice.
11547 return utils.CommaJoin(nodes)
11552 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11553 """Unpacks the result of change-group and node-evacuate iallocator requests.
11555 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11556 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11558 @type lu: L{LogicalUnit}
11559 @param lu: Logical unit instance
11560 @type alloc_result: tuple/list
11561 @param alloc_result: Result from iallocator
11562 @type early_release: bool
11563 @param early_release: Whether to release locks early if possible
11564 @type use_nodes: bool
11565 @param use_nodes: Whether to display node names instead of groups
11568 (moved, failed, jobs) = alloc_result
11571 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11572 for (name, reason) in failed)
11573 lu.LogWarning("Unable to evacuate instances %s", failreason)
11574 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11577 lu.LogInfo("Instances to be moved: %s",
11578 utils.CommaJoin("%s (to %s)" %
11579 (name, _NodeEvacDest(use_nodes, group, nodes))
11580 for (name, group, nodes) in moved))
11582 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11583 map(opcodes.OpCode.LoadOpCode, ops))
11587 class LUInstanceGrowDisk(LogicalUnit):
11588 """Grow a disk of an instance.
11591 HPATH = "disk-grow"
11592 HTYPE = constants.HTYPE_INSTANCE
11595 def ExpandNames(self):
11596 self._ExpandAndLockInstance()
11597 self.needed_locks[locking.LEVEL_NODE] = []
11598 self.needed_locks[locking.LEVEL_NODE_RES] = []
11599 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11600 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11602 def DeclareLocks(self, level):
11603 if level == locking.LEVEL_NODE:
11604 self._LockInstancesNodes()
11605 elif level == locking.LEVEL_NODE_RES:
11607 self.needed_locks[locking.LEVEL_NODE_RES] = \
11608 self.needed_locks[locking.LEVEL_NODE][:]
11610 def BuildHooksEnv(self):
11611 """Build hooks env.
11613 This runs on the master, the primary and all the secondaries.
11617 "DISK": self.op.disk,
11618 "AMOUNT": self.op.amount,
11619 "ABSOLUTE": self.op.absolute,
11621 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11624 def BuildHooksNodes(self):
11625 """Build hooks nodes.
11628 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11631 def CheckPrereq(self):
11632 """Check prerequisites.
11634 This checks that the instance is in the cluster.
11637 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11638 assert instance is not None, \
11639 "Cannot retrieve locked instance %s" % self.op.instance_name
11640 nodenames = list(instance.all_nodes)
11641 for node in nodenames:
11642 _CheckNodeOnline(self, node)
11644 self.instance = instance
11646 if instance.disk_template not in constants.DTS_GROWABLE:
11647 raise errors.OpPrereqError("Instance's disk layout does not support"
11648 " growing", errors.ECODE_INVAL)
11650 self.disk = instance.FindDisk(self.op.disk)
11652 if self.op.absolute:
11653 self.target = self.op.amount
11654 self.delta = self.target - self.disk.size
11656 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11657 "current disk size (%s)" %
11658 (utils.FormatUnit(self.target, "h"),
11659 utils.FormatUnit(self.disk.size, "h")),
11660 errors.ECODE_STATE)
11662 self.delta = self.op.amount
11663 self.target = self.disk.size + self.delta
11665 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11666 utils.FormatUnit(self.delta, "h"),
11667 errors.ECODE_INVAL)
11669 if instance.disk_template not in (constants.DT_FILE,
11670 constants.DT_SHARED_FILE,
11672 # TODO: check the free disk space for file, when that feature will be
11674 _CheckNodesFreeDiskPerVG(self, nodenames,
11675 self.disk.ComputeGrowth(self.delta))
11677 def Exec(self, feedback_fn):
11678 """Execute disk grow.
11681 instance = self.instance
11684 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11685 assert (self.owned_locks(locking.LEVEL_NODE) ==
11686 self.owned_locks(locking.LEVEL_NODE_RES))
11688 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11690 raise errors.OpExecError("Cannot activate block device to grow")
11692 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11693 (self.op.disk, instance.name,
11694 utils.FormatUnit(self.delta, "h"),
11695 utils.FormatUnit(self.target, "h")))
11697 # First run all grow ops in dry-run mode
11698 for node in instance.all_nodes:
11699 self.cfg.SetDiskID(disk, node)
11700 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11702 result.Raise("Grow request failed to node %s" % node)
11704 # We know that (as far as we can test) operations across different
11705 # nodes will succeed, time to run it for real on the backing storage
11706 for node in instance.all_nodes:
11707 self.cfg.SetDiskID(disk, node)
11708 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11710 result.Raise("Grow request failed to node %s" % node)
11712 # And now execute it for logical storage, on the primary node
11713 node = instance.primary_node
11714 self.cfg.SetDiskID(disk, node)
11715 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11717 result.Raise("Grow request failed to node %s" % node)
11719 disk.RecordGrow(self.delta)
11720 self.cfg.Update(instance, feedback_fn)
11722 # Changes have been recorded, release node lock
11723 _ReleaseLocks(self, locking.LEVEL_NODE)
11725 # Downgrade lock while waiting for sync
11726 self.glm.downgrade(locking.LEVEL_INSTANCE)
11728 if self.op.wait_for_sync:
11729 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11731 self.proc.LogWarning("Disk sync-ing has not returned a good"
11732 " status; please check the instance")
11733 if instance.admin_state != constants.ADMINST_UP:
11734 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11735 elif instance.admin_state != constants.ADMINST_UP:
11736 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11737 " not supposed to be running because no wait for"
11738 " sync mode was requested")
11740 assert self.owned_locks(locking.LEVEL_NODE_RES)
11741 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11744 class LUInstanceQueryData(NoHooksLU):
11745 """Query runtime instance data.
11750 def ExpandNames(self):
11751 self.needed_locks = {}
11753 # Use locking if requested or when non-static information is wanted
11754 if not (self.op.static or self.op.use_locking):
11755 self.LogWarning("Non-static data requested, locks need to be acquired")
11756 self.op.use_locking = True
11758 if self.op.instances or not self.op.use_locking:
11759 # Expand instance names right here
11760 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11762 # Will use acquired locks
11763 self.wanted_names = None
11765 if self.op.use_locking:
11766 self.share_locks = _ShareAll()
11768 if self.wanted_names is None:
11769 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11771 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11773 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11774 self.needed_locks[locking.LEVEL_NODE] = []
11775 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11777 def DeclareLocks(self, level):
11778 if self.op.use_locking:
11779 if level == locking.LEVEL_NODEGROUP:
11780 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11782 # Lock all groups used by instances optimistically; this requires going
11783 # via the node before it's locked, requiring verification later on
11784 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11785 frozenset(group_uuid
11786 for instance_name in owned_instances
11788 self.cfg.GetInstanceNodeGroups(instance_name))
11790 elif level == locking.LEVEL_NODE:
11791 self._LockInstancesNodes()
11793 def CheckPrereq(self):
11794 """Check prerequisites.
11796 This only checks the optional instance list against the existing names.
11799 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11800 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11801 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11803 if self.wanted_names is None:
11804 assert self.op.use_locking, "Locking was not used"
11805 self.wanted_names = owned_instances
11807 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11809 if self.op.use_locking:
11810 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11813 assert not (owned_instances or owned_groups or owned_nodes)
11815 self.wanted_instances = instances.values()
11817 def _ComputeBlockdevStatus(self, node, instance, dev):
11818 """Returns the status of a block device
11821 if self.op.static or not node:
11824 self.cfg.SetDiskID(dev, node)
11826 result = self.rpc.call_blockdev_find(node, dev)
11830 result.Raise("Can't compute disk status for %s" % instance.name)
11832 status = result.payload
11836 return (status.dev_path, status.major, status.minor,
11837 status.sync_percent, status.estimated_time,
11838 status.is_degraded, status.ldisk_status)
11840 def _ComputeDiskStatus(self, instance, snode, dev):
11841 """Compute block device status.
11844 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11846 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11848 def _ComputeDiskStatusInner(self, instance, snode, dev):
11849 """Compute block device status.
11851 @attention: The device has to be annotated already.
11854 if dev.dev_type in constants.LDS_DRBD:
11855 # we change the snode then (otherwise we use the one passed in)
11856 if dev.logical_id[0] == instance.primary_node:
11857 snode = dev.logical_id[1]
11859 snode = dev.logical_id[0]
11861 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11863 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11866 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11873 "iv_name": dev.iv_name,
11874 "dev_type": dev.dev_type,
11875 "logical_id": dev.logical_id,
11876 "physical_id": dev.physical_id,
11877 "pstatus": dev_pstatus,
11878 "sstatus": dev_sstatus,
11879 "children": dev_children,
11884 def Exec(self, feedback_fn):
11885 """Gather and return data"""
11888 cluster = self.cfg.GetClusterInfo()
11890 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11891 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11893 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11894 for node in nodes.values()))
11896 group2name_fn = lambda uuid: groups[uuid].name
11898 for instance in self.wanted_instances:
11899 pnode = nodes[instance.primary_node]
11901 if self.op.static or pnode.offline:
11902 remote_state = None
11904 self.LogWarning("Primary node %s is marked offline, returning static"
11905 " information only for instance %s" %
11906 (pnode.name, instance.name))
11908 remote_info = self.rpc.call_instance_info(instance.primary_node,
11910 instance.hypervisor)
11911 remote_info.Raise("Error checking node %s" % instance.primary_node)
11912 remote_info = remote_info.payload
11913 if remote_info and "state" in remote_info:
11914 remote_state = "up"
11916 if instance.admin_state == constants.ADMINST_UP:
11917 remote_state = "down"
11919 remote_state = instance.admin_state
11921 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11924 snodes_group_uuids = [nodes[snode_name].group
11925 for snode_name in instance.secondary_nodes]
11927 result[instance.name] = {
11928 "name": instance.name,
11929 "config_state": instance.admin_state,
11930 "run_state": remote_state,
11931 "pnode": instance.primary_node,
11932 "pnode_group_uuid": pnode.group,
11933 "pnode_group_name": group2name_fn(pnode.group),
11934 "snodes": instance.secondary_nodes,
11935 "snodes_group_uuids": snodes_group_uuids,
11936 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11938 # this happens to be the same format used for hooks
11939 "nics": _NICListToTuple(self, instance.nics),
11940 "disk_template": instance.disk_template,
11942 "hypervisor": instance.hypervisor,
11943 "network_port": instance.network_port,
11944 "hv_instance": instance.hvparams,
11945 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11946 "be_instance": instance.beparams,
11947 "be_actual": cluster.FillBE(instance),
11948 "os_instance": instance.osparams,
11949 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11950 "serial_no": instance.serial_no,
11951 "mtime": instance.mtime,
11952 "ctime": instance.ctime,
11953 "uuid": instance.uuid,
11959 def PrepareContainerMods(mods, private_fn):
11960 """Prepares a list of container modifications by adding a private data field.
11962 @type mods: list of tuples; (operation, index, parameters)
11963 @param mods: List of modifications
11964 @type private_fn: callable or None
11965 @param private_fn: Callable for constructing a private data field for a
11970 if private_fn is None:
11975 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11978 #: Type description for changes as returned by L{ApplyContainerMods}'s
11980 _TApplyContModsCbChanges = \
11981 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11982 ht.TNonEmptyString,
11987 def ApplyContainerMods(kind, container, chgdesc, mods,
11988 create_fn, modify_fn, remove_fn):
11989 """Applies descriptions in C{mods} to C{container}.
11992 @param kind: One-word item description
11993 @type container: list
11994 @param container: Container to modify
11995 @type chgdesc: None or list
11996 @param chgdesc: List of applied changes
11998 @param mods: Modifications as returned by L{PrepareContainerMods}
11999 @type create_fn: callable
12000 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12001 receives absolute item index, parameters and private data object as added
12002 by L{PrepareContainerMods}, returns tuple containing new item and changes
12004 @type modify_fn: callable
12005 @param modify_fn: Callback for modifying an existing item
12006 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12007 and private data object as added by L{PrepareContainerMods}, returns
12009 @type remove_fn: callable
12010 @param remove_fn: Callback on removing item; receives absolute item index,
12011 item and private data object as added by L{PrepareContainerMods}
12014 for (op, idx, params, private) in mods:
12017 absidx = len(container) - 1
12019 raise IndexError("Not accepting negative indices other than -1")
12020 elif idx > len(container):
12021 raise IndexError("Got %s index %s, but there are only %s" %
12022 (kind, idx, len(container)))
12028 if op == constants.DDM_ADD:
12029 # Calculate where item will be added
12031 addidx = len(container)
12035 if create_fn is None:
12038 (item, changes) = create_fn(addidx, params, private)
12041 container.append(item)
12044 assert idx <= len(container)
12045 # list.insert does so before the specified index
12046 container.insert(idx, item)
12048 # Retrieve existing item
12050 item = container[absidx]
12052 raise IndexError("Invalid %s index %s" % (kind, idx))
12054 if op == constants.DDM_REMOVE:
12057 if remove_fn is not None:
12058 remove_fn(absidx, item, private)
12060 changes = [("%s/%s" % (kind, absidx), "remove")]
12062 assert container[absidx] == item
12063 del container[absidx]
12064 elif op == constants.DDM_MODIFY:
12065 if modify_fn is not None:
12066 changes = modify_fn(absidx, item, params, private)
12068 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12070 assert _TApplyContModsCbChanges(changes)
12072 if not (chgdesc is None or changes is None):
12073 chgdesc.extend(changes)
12076 def _UpdateIvNames(base_index, disks):
12077 """Updates the C{iv_name} attribute of disks.
12079 @type disks: list of L{objects.Disk}
12082 for (idx, disk) in enumerate(disks):
12083 disk.iv_name = "disk/%s" % (base_index + idx, )
12086 class _InstNicModPrivate:
12087 """Data structure for network interface modifications.
12089 Used by L{LUInstanceSetParams}.
12092 def __init__(self):
12097 class LUInstanceSetParams(LogicalUnit):
12098 """Modifies an instances's parameters.
12101 HPATH = "instance-modify"
12102 HTYPE = constants.HTYPE_INSTANCE
12106 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12107 assert ht.TList(mods)
12108 assert not mods or len(mods[0]) in (2, 3)
12110 if mods and len(mods[0]) == 2:
12114 for op, params in mods:
12115 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12116 result.append((op, -1, params))
12120 raise errors.OpPrereqError("Only one %s add or remove operation is"
12121 " supported at a time" % kind,
12122 errors.ECODE_INVAL)
12124 result.append((constants.DDM_MODIFY, op, params))
12126 assert verify_fn(result)
12133 def _CheckMods(kind, mods, key_types, item_fn):
12134 """Ensures requested disk/NIC modifications are valid.
12137 for (op, _, params) in mods:
12138 assert ht.TDict(params)
12140 utils.ForceDictType(params, key_types)
12142 if op == constants.DDM_REMOVE:
12144 raise errors.OpPrereqError("No settings should be passed when"
12145 " removing a %s" % kind,
12146 errors.ECODE_INVAL)
12147 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12148 item_fn(op, params)
12150 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12153 def _VerifyDiskModification(op, params):
12154 """Verifies a disk modification.
12157 if op == constants.DDM_ADD:
12158 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12159 if mode not in constants.DISK_ACCESS_SET:
12160 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12161 errors.ECODE_INVAL)
12163 size = params.get(constants.IDISK_SIZE, None)
12165 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12166 constants.IDISK_SIZE, errors.ECODE_INVAL)
12170 except (TypeError, ValueError), err:
12171 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12172 errors.ECODE_INVAL)
12174 params[constants.IDISK_SIZE] = size
12176 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12177 raise errors.OpPrereqError("Disk size change not possible, use"
12178 " grow-disk", errors.ECODE_INVAL)
12181 def _VerifyNicModification(op, params):
12182 """Verifies a network interface modification.
12185 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12186 ip = params.get(constants.INIC_IP, None)
12189 elif ip.lower() == constants.VALUE_NONE:
12190 params[constants.INIC_IP] = None
12191 elif not netutils.IPAddress.IsValid(ip):
12192 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12193 errors.ECODE_INVAL)
12195 bridge = params.get("bridge", None)
12196 link = params.get(constants.INIC_LINK, None)
12197 if bridge and link:
12198 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12199 " at the same time", errors.ECODE_INVAL)
12200 elif bridge and bridge.lower() == constants.VALUE_NONE:
12201 params["bridge"] = None
12202 elif link and link.lower() == constants.VALUE_NONE:
12203 params[constants.INIC_LINK] = None
12205 if op == constants.DDM_ADD:
12206 macaddr = params.get(constants.INIC_MAC, None)
12207 if macaddr is None:
12208 params[constants.INIC_MAC] = constants.VALUE_AUTO
12210 if constants.INIC_MAC in params:
12211 macaddr = params[constants.INIC_MAC]
12212 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12213 macaddr = utils.NormalizeAndValidateMac(macaddr)
12215 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12216 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12217 " modifying an existing NIC",
12218 errors.ECODE_INVAL)
12220 def CheckArguments(self):
12221 if not (self.op.nics or self.op.disks or self.op.disk_template or
12222 self.op.hvparams or self.op.beparams or self.op.os_name or
12223 self.op.offline is not None or self.op.runtime_mem):
12224 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12226 if self.op.hvparams:
12227 _CheckGlobalHvParams(self.op.hvparams)
12230 self._UpgradeDiskNicMods("disk", self.op.disks,
12231 opcodes.OpInstanceSetParams.TestDiskModifications)
12233 self._UpgradeDiskNicMods("NIC", self.op.nics,
12234 opcodes.OpInstanceSetParams.TestNicModifications)
12236 # Check disk modifications
12237 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12238 self._VerifyDiskModification)
12240 if self.op.disks and self.op.disk_template is not None:
12241 raise errors.OpPrereqError("Disk template conversion and other disk"
12242 " changes not supported at the same time",
12243 errors.ECODE_INVAL)
12245 if (self.op.disk_template and
12246 self.op.disk_template in constants.DTS_INT_MIRROR and
12247 self.op.remote_node is None):
12248 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12249 " one requires specifying a secondary node",
12250 errors.ECODE_INVAL)
12252 # Check NIC modifications
12253 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12254 self._VerifyNicModification)
12256 def ExpandNames(self):
12257 self._ExpandAndLockInstance()
12258 # Can't even acquire node locks in shared mode as upcoming changes in
12259 # Ganeti 2.6 will start to modify the node object on disk conversion
12260 self.needed_locks[locking.LEVEL_NODE] = []
12261 self.needed_locks[locking.LEVEL_NODE_RES] = []
12262 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12264 def DeclareLocks(self, level):
12265 # TODO: Acquire group lock in shared mode (disk parameters)
12266 if level == locking.LEVEL_NODE:
12267 self._LockInstancesNodes()
12268 if self.op.disk_template and self.op.remote_node:
12269 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12270 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12271 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12273 self.needed_locks[locking.LEVEL_NODE_RES] = \
12274 self.needed_locks[locking.LEVEL_NODE][:]
12276 def BuildHooksEnv(self):
12277 """Build hooks env.
12279 This runs on the master, primary and secondaries.
12283 if constants.BE_MINMEM in self.be_new:
12284 args["minmem"] = self.be_new[constants.BE_MINMEM]
12285 if constants.BE_MAXMEM in self.be_new:
12286 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12287 if constants.BE_VCPUS in self.be_new:
12288 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12289 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12290 # information at all.
12292 if self._new_nics is not None:
12295 for nic in self._new_nics:
12296 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12297 mode = nicparams[constants.NIC_MODE]
12298 link = nicparams[constants.NIC_LINK]
12299 nics.append((nic.ip, nic.mac, mode, link))
12301 args["nics"] = nics
12303 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12304 if self.op.disk_template:
12305 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12306 if self.op.runtime_mem:
12307 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12311 def BuildHooksNodes(self):
12312 """Build hooks nodes.
12315 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12318 def _PrepareNicModification(self, params, private, old_ip, old_params,
12320 update_params_dict = dict([(key, params[key])
12321 for key in constants.NICS_PARAMETERS
12324 if "bridge" in params:
12325 update_params_dict[constants.NIC_LINK] = params["bridge"]
12327 new_params = _GetUpdatedParams(old_params, update_params_dict)
12328 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12330 new_filled_params = cluster.SimpleFillNIC(new_params)
12331 objects.NIC.CheckParameterSyntax(new_filled_params)
12333 new_mode = new_filled_params[constants.NIC_MODE]
12334 if new_mode == constants.NIC_MODE_BRIDGED:
12335 bridge = new_filled_params[constants.NIC_LINK]
12336 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12338 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12340 self.warn.append(msg)
12342 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12344 elif new_mode == constants.NIC_MODE_ROUTED:
12345 ip = params.get(constants.INIC_IP, old_ip)
12347 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12348 " on a routed NIC", errors.ECODE_INVAL)
12350 if constants.INIC_MAC in params:
12351 mac = params[constants.INIC_MAC]
12353 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12354 errors.ECODE_INVAL)
12355 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12356 # otherwise generate the MAC address
12357 params[constants.INIC_MAC] = \
12358 self.cfg.GenerateMAC(self.proc.GetECId())
12360 # or validate/reserve the current one
12362 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12363 except errors.ReservationError:
12364 raise errors.OpPrereqError("MAC address '%s' already in use"
12365 " in cluster" % mac,
12366 errors.ECODE_NOTUNIQUE)
12368 private.params = new_params
12369 private.filled = new_filled_params
12371 def CheckPrereq(self):
12372 """Check prerequisites.
12374 This only checks the instance list against the existing names.
12377 # checking the new params on the primary/secondary nodes
12379 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12380 cluster = self.cluster = self.cfg.GetClusterInfo()
12381 assert self.instance is not None, \
12382 "Cannot retrieve locked instance %s" % self.op.instance_name
12383 pnode = instance.primary_node
12384 nodelist = list(instance.all_nodes)
12385 pnode_info = self.cfg.GetNodeInfo(pnode)
12386 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12388 # Prepare disk/NIC modifications
12389 self.diskmod = PrepareContainerMods(self.op.disks, None)
12390 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12393 if self.op.os_name and not self.op.force:
12394 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12395 self.op.force_variant)
12396 instance_os = self.op.os_name
12398 instance_os = instance.os
12400 assert not (self.op.disk_template and self.op.disks), \
12401 "Can't modify disk template and apply disk changes at the same time"
12403 if self.op.disk_template:
12404 if instance.disk_template == self.op.disk_template:
12405 raise errors.OpPrereqError("Instance already has disk template %s" %
12406 instance.disk_template, errors.ECODE_INVAL)
12408 if (instance.disk_template,
12409 self.op.disk_template) not in self._DISK_CONVERSIONS:
12410 raise errors.OpPrereqError("Unsupported disk template conversion from"
12411 " %s to %s" % (instance.disk_template,
12412 self.op.disk_template),
12413 errors.ECODE_INVAL)
12414 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12415 msg="cannot change disk template")
12416 if self.op.disk_template in constants.DTS_INT_MIRROR:
12417 if self.op.remote_node == pnode:
12418 raise errors.OpPrereqError("Given new secondary node %s is the same"
12419 " as the primary node of the instance" %
12420 self.op.remote_node, errors.ECODE_STATE)
12421 _CheckNodeOnline(self, self.op.remote_node)
12422 _CheckNodeNotDrained(self, self.op.remote_node)
12423 # FIXME: here we assume that the old instance type is DT_PLAIN
12424 assert instance.disk_template == constants.DT_PLAIN
12425 disks = [{constants.IDISK_SIZE: d.size,
12426 constants.IDISK_VG: d.logical_id[0]}
12427 for d in instance.disks]
12428 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12429 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12431 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12432 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12433 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12434 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12435 ignore=self.op.ignore_ipolicy)
12436 if pnode_info.group != snode_info.group:
12437 self.LogWarning("The primary and secondary nodes are in two"
12438 " different node groups; the disk parameters"
12439 " from the first disk's node group will be"
12442 # hvparams processing
12443 if self.op.hvparams:
12444 hv_type = instance.hypervisor
12445 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12446 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12447 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12450 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12451 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12452 self.hv_proposed = self.hv_new = hv_new # the new actual values
12453 self.hv_inst = i_hvdict # the new dict (without defaults)
12455 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12457 self.hv_new = self.hv_inst = {}
12459 # beparams processing
12460 if self.op.beparams:
12461 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12463 objects.UpgradeBeParams(i_bedict)
12464 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12465 be_new = cluster.SimpleFillBE(i_bedict)
12466 self.be_proposed = self.be_new = be_new # the new actual values
12467 self.be_inst = i_bedict # the new dict (without defaults)
12469 self.be_new = self.be_inst = {}
12470 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12471 be_old = cluster.FillBE(instance)
12473 # CPU param validation -- checking every time a parameter is
12474 # changed to cover all cases where either CPU mask or vcpus have
12476 if (constants.BE_VCPUS in self.be_proposed and
12477 constants.HV_CPU_MASK in self.hv_proposed):
12479 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12480 # Verify mask is consistent with number of vCPUs. Can skip this
12481 # test if only 1 entry in the CPU mask, which means same mask
12482 # is applied to all vCPUs.
12483 if (len(cpu_list) > 1 and
12484 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12485 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12487 (self.be_proposed[constants.BE_VCPUS],
12488 self.hv_proposed[constants.HV_CPU_MASK]),
12489 errors.ECODE_INVAL)
12491 # Only perform this test if a new CPU mask is given
12492 if constants.HV_CPU_MASK in self.hv_new:
12493 # Calculate the largest CPU number requested
12494 max_requested_cpu = max(map(max, cpu_list))
12495 # Check that all of the instance's nodes have enough physical CPUs to
12496 # satisfy the requested CPU mask
12497 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12498 max_requested_cpu + 1, instance.hypervisor)
12500 # osparams processing
12501 if self.op.osparams:
12502 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12503 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12504 self.os_inst = i_osdict # the new dict (without defaults)
12510 #TODO(dynmem): do the appropriate check involving MINMEM
12511 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12512 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12513 mem_check_list = [pnode]
12514 if be_new[constants.BE_AUTO_BALANCE]:
12515 # either we changed auto_balance to yes or it was from before
12516 mem_check_list.extend(instance.secondary_nodes)
12517 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12518 instance.hypervisor)
12519 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12520 [instance.hypervisor])
12521 pninfo = nodeinfo[pnode]
12522 msg = pninfo.fail_msg
12524 # Assume the primary node is unreachable and go ahead
12525 self.warn.append("Can't get info from primary node %s: %s" %
12528 (_, _, (pnhvinfo, )) = pninfo.payload
12529 if not isinstance(pnhvinfo.get("memory_free", None), int):
12530 self.warn.append("Node data from primary node %s doesn't contain"
12531 " free memory information" % pnode)
12532 elif instance_info.fail_msg:
12533 self.warn.append("Can't get instance runtime information: %s" %
12534 instance_info.fail_msg)
12536 if instance_info.payload:
12537 current_mem = int(instance_info.payload["memory"])
12539 # Assume instance not running
12540 # (there is a slight race condition here, but it's not very
12541 # probable, and we have no other way to check)
12542 # TODO: Describe race condition
12544 #TODO(dynmem): do the appropriate check involving MINMEM
12545 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12546 pnhvinfo["memory_free"])
12548 raise errors.OpPrereqError("This change will prevent the instance"
12549 " from starting, due to %d MB of memory"
12550 " missing on its primary node" %
12552 errors.ECODE_NORES)
12554 if be_new[constants.BE_AUTO_BALANCE]:
12555 for node, nres in nodeinfo.items():
12556 if node not in instance.secondary_nodes:
12558 nres.Raise("Can't get info from secondary node %s" % node,
12559 prereq=True, ecode=errors.ECODE_STATE)
12560 (_, _, (nhvinfo, )) = nres.payload
12561 if not isinstance(nhvinfo.get("memory_free", None), int):
12562 raise errors.OpPrereqError("Secondary node %s didn't return free"
12563 " memory information" % node,
12564 errors.ECODE_STATE)
12565 #TODO(dynmem): do the appropriate check involving MINMEM
12566 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12567 raise errors.OpPrereqError("This change will prevent the instance"
12568 " from failover to its secondary node"
12569 " %s, due to not enough memory" % node,
12570 errors.ECODE_STATE)
12572 if self.op.runtime_mem:
12573 remote_info = self.rpc.call_instance_info(instance.primary_node,
12575 instance.hypervisor)
12576 remote_info.Raise("Error checking node %s" % instance.primary_node)
12577 if not remote_info.payload: # not running already
12578 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12579 errors.ECODE_STATE)
12581 current_memory = remote_info.payload["memory"]
12582 if (not self.op.force and
12583 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12584 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12585 raise errors.OpPrereqError("Instance %s must have memory between %d"
12586 " and %d MB of memory unless --force is"
12587 " given" % (instance.name,
12588 self.be_proposed[constants.BE_MINMEM],
12589 self.be_proposed[constants.BE_MAXMEM]),
12590 errors.ECODE_INVAL)
12592 if self.op.runtime_mem > current_memory:
12593 _CheckNodeFreeMemory(self, instance.primary_node,
12594 "ballooning memory for instance %s" %
12596 self.op.memory - current_memory,
12597 instance.hypervisor)
12599 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12600 raise errors.OpPrereqError("Disk operations not supported for"
12601 " diskless instances",
12602 errors.ECODE_INVAL)
12604 def _PrepareNicCreate(_, params, private):
12605 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12606 return (None, None)
12608 def _PrepareNicMod(_, nic, params, private):
12609 self._PrepareNicModification(params, private, nic.ip,
12610 nic.nicparams, cluster, pnode)
12613 # Verify NIC changes (operating on copy)
12614 nics = instance.nics[:]
12615 ApplyContainerMods("NIC", nics, None, self.nicmod,
12616 _PrepareNicCreate, _PrepareNicMod, None)
12617 if len(nics) > constants.MAX_NICS:
12618 raise errors.OpPrereqError("Instance has too many network interfaces"
12619 " (%d), cannot add more" % constants.MAX_NICS,
12620 errors.ECODE_STATE)
12622 # Verify disk changes (operating on a copy)
12623 disks = instance.disks[:]
12624 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12625 if len(disks) > constants.MAX_DISKS:
12626 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12627 " more" % constants.MAX_DISKS,
12628 errors.ECODE_STATE)
12630 if self.op.offline is not None:
12631 if self.op.offline:
12632 msg = "can't change to offline"
12634 msg = "can't change to online"
12635 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12637 # Pre-compute NIC changes (necessary to use result in hooks)
12638 self._nic_chgdesc = []
12640 # Operate on copies as this is still in prereq
12641 nics = [nic.Copy() for nic in instance.nics]
12642 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12643 self._CreateNewNic, self._ApplyNicMods, None)
12644 self._new_nics = nics
12646 self._new_nics = None
12648 def _ConvertPlainToDrbd(self, feedback_fn):
12649 """Converts an instance from plain to drbd.
12652 feedback_fn("Converting template to drbd")
12653 instance = self.instance
12654 pnode = instance.primary_node
12655 snode = self.op.remote_node
12657 assert instance.disk_template == constants.DT_PLAIN
12659 # create a fake disk info for _GenerateDiskTemplate
12660 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12661 constants.IDISK_VG: d.logical_id[0]}
12662 for d in instance.disks]
12663 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12664 instance.name, pnode, [snode],
12665 disk_info, None, None, 0, feedback_fn,
12667 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12669 info = _GetInstanceInfoText(instance)
12670 feedback_fn("Creating additional volumes...")
12671 # first, create the missing data and meta devices
12672 for disk in anno_disks:
12673 # unfortunately this is... not too nice
12674 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12676 for child in disk.children:
12677 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12678 # at this stage, all new LVs have been created, we can rename the
12680 feedback_fn("Renaming original volumes...")
12681 rename_list = [(o, n.children[0].logical_id)
12682 for (o, n) in zip(instance.disks, new_disks)]
12683 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12684 result.Raise("Failed to rename original LVs")
12686 feedback_fn("Initializing DRBD devices...")
12687 # all child devices are in place, we can now create the DRBD devices
12688 for disk in anno_disks:
12689 for node in [pnode, snode]:
12690 f_create = node == pnode
12691 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12693 # at this point, the instance has been modified
12694 instance.disk_template = constants.DT_DRBD8
12695 instance.disks = new_disks
12696 self.cfg.Update(instance, feedback_fn)
12698 # Release node locks while waiting for sync
12699 _ReleaseLocks(self, locking.LEVEL_NODE)
12701 # disks are created, waiting for sync
12702 disk_abort = not _WaitForSync(self, instance,
12703 oneshot=not self.op.wait_for_sync)
12705 raise errors.OpExecError("There are some degraded disks for"
12706 " this instance, please cleanup manually")
12708 # Node resource locks will be released by caller
12710 def _ConvertDrbdToPlain(self, feedback_fn):
12711 """Converts an instance from drbd to plain.
12714 instance = self.instance
12716 assert len(instance.secondary_nodes) == 1
12717 assert instance.disk_template == constants.DT_DRBD8
12719 pnode = instance.primary_node
12720 snode = instance.secondary_nodes[0]
12721 feedback_fn("Converting template to plain")
12723 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12724 new_disks = [d.children[0] for d in instance.disks]
12726 # copy over size and mode
12727 for parent, child in zip(old_disks, new_disks):
12728 child.size = parent.size
12729 child.mode = parent.mode
12731 # this is a DRBD disk, return its port to the pool
12732 # NOTE: this must be done right before the call to cfg.Update!
12733 for disk in old_disks:
12734 tcp_port = disk.logical_id[2]
12735 self.cfg.AddTcpUdpPort(tcp_port)
12737 # update instance structure
12738 instance.disks = new_disks
12739 instance.disk_template = constants.DT_PLAIN
12740 self.cfg.Update(instance, feedback_fn)
12742 # Release locks in case removing disks takes a while
12743 _ReleaseLocks(self, locking.LEVEL_NODE)
12745 feedback_fn("Removing volumes on the secondary node...")
12746 for disk in old_disks:
12747 self.cfg.SetDiskID(disk, snode)
12748 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12750 self.LogWarning("Could not remove block device %s on node %s,"
12751 " continuing anyway: %s", disk.iv_name, snode, msg)
12753 feedback_fn("Removing unneeded volumes on the primary node...")
12754 for idx, disk in enumerate(old_disks):
12755 meta = disk.children[1]
12756 self.cfg.SetDiskID(meta, pnode)
12757 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12759 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12760 " continuing anyway: %s", idx, pnode, msg)
12762 def _CreateNewDisk(self, idx, params, _):
12763 """Creates a new disk.
12766 instance = self.instance
12769 if instance.disk_template in constants.DTS_FILEBASED:
12770 (file_driver, file_path) = instance.disks[0].logical_id
12771 file_path = os.path.dirname(file_path)
12773 file_driver = file_path = None
12776 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12777 instance.primary_node, instance.secondary_nodes,
12778 [params], file_path, file_driver, idx,
12779 self.Log, self.diskparams)[0]
12781 info = _GetInstanceInfoText(instance)
12783 logging.info("Creating volume %s for instance %s",
12784 disk.iv_name, instance.name)
12785 # Note: this needs to be kept in sync with _CreateDisks
12787 for node in instance.all_nodes:
12788 f_create = (node == instance.primary_node)
12790 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12791 except errors.OpExecError, err:
12792 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12793 disk.iv_name, disk, node, err)
12796 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12800 def _ModifyDisk(idx, disk, params, _):
12801 """Modifies a disk.
12804 disk.mode = params[constants.IDISK_MODE]
12807 ("disk.mode/%d" % idx, disk.mode),
12810 def _RemoveDisk(self, idx, root, _):
12814 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12815 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12816 self.cfg.SetDiskID(disk, node)
12817 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12819 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12820 " continuing anyway", idx, node, msg)
12822 # if this is a DRBD disk, return its port to the pool
12823 if root.dev_type in constants.LDS_DRBD:
12824 self.cfg.AddTcpUdpPort(root.logical_id[2])
12827 def _CreateNewNic(idx, params, private):
12828 """Creates data structure for a new network interface.
12831 mac = params[constants.INIC_MAC]
12832 ip = params.get(constants.INIC_IP, None)
12833 nicparams = private.params
12835 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12837 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12838 (mac, ip, private.filled[constants.NIC_MODE],
12839 private.filled[constants.NIC_LINK])),
12843 def _ApplyNicMods(idx, nic, params, private):
12844 """Modifies a network interface.
12849 for key in [constants.INIC_MAC, constants.INIC_IP]:
12851 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12852 setattr(nic, key, params[key])
12855 nic.nicparams = private.params
12857 for (key, val) in params.items():
12858 changes.append(("nic.%s/%d" % (key, idx), val))
12862 def Exec(self, feedback_fn):
12863 """Modifies an instance.
12865 All parameters take effect only at the next restart of the instance.
12868 # Process here the warnings from CheckPrereq, as we don't have a
12869 # feedback_fn there.
12870 # TODO: Replace with self.LogWarning
12871 for warn in self.warn:
12872 feedback_fn("WARNING: %s" % warn)
12874 assert ((self.op.disk_template is None) ^
12875 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12876 "Not owning any node resource locks"
12879 instance = self.instance
12882 if self.op.runtime_mem:
12883 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12885 self.op.runtime_mem)
12886 rpcres.Raise("Cannot modify instance runtime memory")
12887 result.append(("runtime_memory", self.op.runtime_mem))
12889 # Apply disk changes
12890 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12891 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12892 _UpdateIvNames(0, instance.disks)
12894 if self.op.disk_template:
12896 check_nodes = set(instance.all_nodes)
12897 if self.op.remote_node:
12898 check_nodes.add(self.op.remote_node)
12899 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12900 owned = self.owned_locks(level)
12901 assert not (check_nodes - owned), \
12902 ("Not owning the correct locks, owning %r, expected at least %r" %
12903 (owned, check_nodes))
12905 r_shut = _ShutdownInstanceDisks(self, instance)
12907 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12908 " proceed with disk template conversion")
12909 mode = (instance.disk_template, self.op.disk_template)
12911 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12913 self.cfg.ReleaseDRBDMinors(instance.name)
12915 result.append(("disk_template", self.op.disk_template))
12917 assert instance.disk_template == self.op.disk_template, \
12918 ("Expected disk template '%s', found '%s'" %
12919 (self.op.disk_template, instance.disk_template))
12921 # Release node and resource locks if there are any (they might already have
12922 # been released during disk conversion)
12923 _ReleaseLocks(self, locking.LEVEL_NODE)
12924 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12926 # Apply NIC changes
12927 if self._new_nics is not None:
12928 instance.nics = self._new_nics
12929 result.extend(self._nic_chgdesc)
12932 if self.op.hvparams:
12933 instance.hvparams = self.hv_inst
12934 for key, val in self.op.hvparams.iteritems():
12935 result.append(("hv/%s" % key, val))
12938 if self.op.beparams:
12939 instance.beparams = self.be_inst
12940 for key, val in self.op.beparams.iteritems():
12941 result.append(("be/%s" % key, val))
12944 if self.op.os_name:
12945 instance.os = self.op.os_name
12948 if self.op.osparams:
12949 instance.osparams = self.os_inst
12950 for key, val in self.op.osparams.iteritems():
12951 result.append(("os/%s" % key, val))
12953 if self.op.offline is None:
12956 elif self.op.offline:
12957 # Mark instance as offline
12958 self.cfg.MarkInstanceOffline(instance.name)
12959 result.append(("admin_state", constants.ADMINST_OFFLINE))
12961 # Mark instance as online, but stopped
12962 self.cfg.MarkInstanceDown(instance.name)
12963 result.append(("admin_state", constants.ADMINST_DOWN))
12965 self.cfg.Update(instance, feedback_fn)
12967 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12968 self.owned_locks(locking.LEVEL_NODE)), \
12969 "All node locks should have been released by now"
12973 _DISK_CONVERSIONS = {
12974 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12975 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12979 class LUInstanceChangeGroup(LogicalUnit):
12980 HPATH = "instance-change-group"
12981 HTYPE = constants.HTYPE_INSTANCE
12984 def ExpandNames(self):
12985 self.share_locks = _ShareAll()
12986 self.needed_locks = {
12987 locking.LEVEL_NODEGROUP: [],
12988 locking.LEVEL_NODE: [],
12991 self._ExpandAndLockInstance()
12993 if self.op.target_groups:
12994 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12995 self.op.target_groups)
12997 self.req_target_uuids = None
12999 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13001 def DeclareLocks(self, level):
13002 if level == locking.LEVEL_NODEGROUP:
13003 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13005 if self.req_target_uuids:
13006 lock_groups = set(self.req_target_uuids)
13008 # Lock all groups used by instance optimistically; this requires going
13009 # via the node before it's locked, requiring verification later on
13010 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13011 lock_groups.update(instance_groups)
13013 # No target groups, need to lock all of them
13014 lock_groups = locking.ALL_SET
13016 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13018 elif level == locking.LEVEL_NODE:
13019 if self.req_target_uuids:
13020 # Lock all nodes used by instances
13021 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13022 self._LockInstancesNodes()
13024 # Lock all nodes in all potential target groups
13025 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13026 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13027 member_nodes = [node_name
13028 for group in lock_groups
13029 for node_name in self.cfg.GetNodeGroup(group).members]
13030 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13032 # Lock all nodes as all groups are potential targets
13033 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13035 def CheckPrereq(self):
13036 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13037 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13038 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13040 assert (self.req_target_uuids is None or
13041 owned_groups.issuperset(self.req_target_uuids))
13042 assert owned_instances == set([self.op.instance_name])
13044 # Get instance information
13045 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13047 # Check if node groups for locked instance are still correct
13048 assert owned_nodes.issuperset(self.instance.all_nodes), \
13049 ("Instance %s's nodes changed while we kept the lock" %
13050 self.op.instance_name)
13052 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13055 if self.req_target_uuids:
13056 # User requested specific target groups
13057 self.target_uuids = frozenset(self.req_target_uuids)
13059 # All groups except those used by the instance are potential targets
13060 self.target_uuids = owned_groups - inst_groups
13062 conflicting_groups = self.target_uuids & inst_groups
13063 if conflicting_groups:
13064 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13065 " used by the instance '%s'" %
13066 (utils.CommaJoin(conflicting_groups),
13067 self.op.instance_name),
13068 errors.ECODE_INVAL)
13070 if not self.target_uuids:
13071 raise errors.OpPrereqError("There are no possible target groups",
13072 errors.ECODE_INVAL)
13074 def BuildHooksEnv(self):
13075 """Build hooks env.
13078 assert self.target_uuids
13081 "TARGET_GROUPS": " ".join(self.target_uuids),
13084 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13088 def BuildHooksNodes(self):
13089 """Build hooks nodes.
13092 mn = self.cfg.GetMasterNode()
13093 return ([mn], [mn])
13095 def Exec(self, feedback_fn):
13096 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13098 assert instances == [self.op.instance_name], "Instance not locked"
13100 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13101 instances=instances, target_groups=list(self.target_uuids))
13103 ial.Run(self.op.iallocator)
13105 if not ial.success:
13106 raise errors.OpPrereqError("Can't compute solution for changing group of"
13107 " instance '%s' using iallocator '%s': %s" %
13108 (self.op.instance_name, self.op.iallocator,
13110 errors.ECODE_NORES)
13112 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13114 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13115 " instance '%s'", len(jobs), self.op.instance_name)
13117 return ResultWithJobs(jobs)
13120 class LUBackupQuery(NoHooksLU):
13121 """Query the exports list
13126 def CheckArguments(self):
13127 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13128 ["node", "export"], self.op.use_locking)
13130 def ExpandNames(self):
13131 self.expq.ExpandNames(self)
13133 def DeclareLocks(self, level):
13134 self.expq.DeclareLocks(self, level)
13136 def Exec(self, feedback_fn):
13139 for (node, expname) in self.expq.OldStyleQuery(self):
13140 if expname is None:
13141 result[node] = False
13143 result.setdefault(node, []).append(expname)
13148 class _ExportQuery(_QueryBase):
13149 FIELDS = query.EXPORT_FIELDS
13151 #: The node name is not a unique key for this query
13152 SORT_FIELD = "node"
13154 def ExpandNames(self, lu):
13155 lu.needed_locks = {}
13157 # The following variables interact with _QueryBase._GetNames
13159 self.wanted = _GetWantedNodes(lu, self.names)
13161 self.wanted = locking.ALL_SET
13163 self.do_locking = self.use_locking
13165 if self.do_locking:
13166 lu.share_locks = _ShareAll()
13167 lu.needed_locks = {
13168 locking.LEVEL_NODE: self.wanted,
13171 def DeclareLocks(self, lu, level):
13174 def _GetQueryData(self, lu):
13175 """Computes the list of nodes and their attributes.
13178 # Locking is not used
13180 assert not (compat.any(lu.glm.is_owned(level)
13181 for level in locking.LEVELS
13182 if level != locking.LEVEL_CLUSTER) or
13183 self.do_locking or self.use_locking)
13185 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13189 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13191 result.append((node, None))
13193 result.extend((node, expname) for expname in nres.payload)
13198 class LUBackupPrepare(NoHooksLU):
13199 """Prepares an instance for an export and returns useful information.
13204 def ExpandNames(self):
13205 self._ExpandAndLockInstance()
13207 def CheckPrereq(self):
13208 """Check prerequisites.
13211 instance_name = self.op.instance_name
13213 self.instance = self.cfg.GetInstanceInfo(instance_name)
13214 assert self.instance is not None, \
13215 "Cannot retrieve locked instance %s" % self.op.instance_name
13216 _CheckNodeOnline(self, self.instance.primary_node)
13218 self._cds = _GetClusterDomainSecret()
13220 def Exec(self, feedback_fn):
13221 """Prepares an instance for an export.
13224 instance = self.instance
13226 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13227 salt = utils.GenerateSecret(8)
13229 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13230 result = self.rpc.call_x509_cert_create(instance.primary_node,
13231 constants.RIE_CERT_VALIDITY)
13232 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13234 (name, cert_pem) = result.payload
13236 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13240 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13241 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13243 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13249 class LUBackupExport(LogicalUnit):
13250 """Export an instance to an image in the cluster.
13253 HPATH = "instance-export"
13254 HTYPE = constants.HTYPE_INSTANCE
13257 def CheckArguments(self):
13258 """Check the arguments.
13261 self.x509_key_name = self.op.x509_key_name
13262 self.dest_x509_ca_pem = self.op.destination_x509_ca
13264 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13265 if not self.x509_key_name:
13266 raise errors.OpPrereqError("Missing X509 key name for encryption",
13267 errors.ECODE_INVAL)
13269 if not self.dest_x509_ca_pem:
13270 raise errors.OpPrereqError("Missing destination X509 CA",
13271 errors.ECODE_INVAL)
13273 def ExpandNames(self):
13274 self._ExpandAndLockInstance()
13276 # Lock all nodes for local exports
13277 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13278 # FIXME: lock only instance primary and destination node
13280 # Sad but true, for now we have do lock all nodes, as we don't know where
13281 # the previous export might be, and in this LU we search for it and
13282 # remove it from its current node. In the future we could fix this by:
13283 # - making a tasklet to search (share-lock all), then create the
13284 # new one, then one to remove, after
13285 # - removing the removal operation altogether
13286 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13288 def DeclareLocks(self, level):
13289 """Last minute lock declaration."""
13290 # All nodes are locked anyway, so nothing to do here.
13292 def BuildHooksEnv(self):
13293 """Build hooks env.
13295 This will run on the master, primary node and target node.
13299 "EXPORT_MODE": self.op.mode,
13300 "EXPORT_NODE": self.op.target_node,
13301 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13302 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13303 # TODO: Generic function for boolean env variables
13304 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13307 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13311 def BuildHooksNodes(self):
13312 """Build hooks nodes.
13315 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13317 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13318 nl.append(self.op.target_node)
13322 def CheckPrereq(self):
13323 """Check prerequisites.
13325 This checks that the instance and node names are valid.
13328 instance_name = self.op.instance_name
13330 self.instance = self.cfg.GetInstanceInfo(instance_name)
13331 assert self.instance is not None, \
13332 "Cannot retrieve locked instance %s" % self.op.instance_name
13333 _CheckNodeOnline(self, self.instance.primary_node)
13335 if (self.op.remove_instance and
13336 self.instance.admin_state == constants.ADMINST_UP and
13337 not self.op.shutdown):
13338 raise errors.OpPrereqError("Can not remove instance without shutting it"
13341 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13342 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13343 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13344 assert self.dst_node is not None
13346 _CheckNodeOnline(self, self.dst_node.name)
13347 _CheckNodeNotDrained(self, self.dst_node.name)
13350 self.dest_disk_info = None
13351 self.dest_x509_ca = None
13353 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13354 self.dst_node = None
13356 if len(self.op.target_node) != len(self.instance.disks):
13357 raise errors.OpPrereqError(("Received destination information for %s"
13358 " disks, but instance %s has %s disks") %
13359 (len(self.op.target_node), instance_name,
13360 len(self.instance.disks)),
13361 errors.ECODE_INVAL)
13363 cds = _GetClusterDomainSecret()
13365 # Check X509 key name
13367 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13368 except (TypeError, ValueError), err:
13369 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13371 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13372 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13373 errors.ECODE_INVAL)
13375 # Load and verify CA
13377 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13378 except OpenSSL.crypto.Error, err:
13379 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13380 (err, ), errors.ECODE_INVAL)
13382 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13383 if errcode is not None:
13384 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13385 (msg, ), errors.ECODE_INVAL)
13387 self.dest_x509_ca = cert
13389 # Verify target information
13391 for idx, disk_data in enumerate(self.op.target_node):
13393 (host, port, magic) = \
13394 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13395 except errors.GenericError, err:
13396 raise errors.OpPrereqError("Target info for disk %s: %s" %
13397 (idx, err), errors.ECODE_INVAL)
13399 disk_info.append((host, port, magic))
13401 assert len(disk_info) == len(self.op.target_node)
13402 self.dest_disk_info = disk_info
13405 raise errors.ProgrammerError("Unhandled export mode %r" %
13408 # instance disk type verification
13409 # TODO: Implement export support for file-based disks
13410 for disk in self.instance.disks:
13411 if disk.dev_type == constants.LD_FILE:
13412 raise errors.OpPrereqError("Export not supported for instances with"
13413 " file-based disks", errors.ECODE_INVAL)
13415 def _CleanupExports(self, feedback_fn):
13416 """Removes exports of current instance from all other nodes.
13418 If an instance in a cluster with nodes A..D was exported to node C, its
13419 exports will be removed from the nodes A, B and D.
13422 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13424 nodelist = self.cfg.GetNodeList()
13425 nodelist.remove(self.dst_node.name)
13427 # on one-node clusters nodelist will be empty after the removal
13428 # if we proceed the backup would be removed because OpBackupQuery
13429 # substitutes an empty list with the full cluster node list.
13430 iname = self.instance.name
13432 feedback_fn("Removing old exports for instance %s" % iname)
13433 exportlist = self.rpc.call_export_list(nodelist)
13434 for node in exportlist:
13435 if exportlist[node].fail_msg:
13437 if iname in exportlist[node].payload:
13438 msg = self.rpc.call_export_remove(node, iname).fail_msg
13440 self.LogWarning("Could not remove older export for instance %s"
13441 " on node %s: %s", iname, node, msg)
13443 def Exec(self, feedback_fn):
13444 """Export an instance to an image in the cluster.
13447 assert self.op.mode in constants.EXPORT_MODES
13449 instance = self.instance
13450 src_node = instance.primary_node
13452 if self.op.shutdown:
13453 # shutdown the instance, but not the disks
13454 feedback_fn("Shutting down instance %s" % instance.name)
13455 result = self.rpc.call_instance_shutdown(src_node, instance,
13456 self.op.shutdown_timeout)
13457 # TODO: Maybe ignore failures if ignore_remove_failures is set
13458 result.Raise("Could not shutdown instance %s on"
13459 " node %s" % (instance.name, src_node))
13461 # set the disks ID correctly since call_instance_start needs the
13462 # correct drbd minor to create the symlinks
13463 for disk in instance.disks:
13464 self.cfg.SetDiskID(disk, src_node)
13466 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13469 # Activate the instance disks if we'exporting a stopped instance
13470 feedback_fn("Activating disks for %s" % instance.name)
13471 _StartInstanceDisks(self, instance, None)
13474 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13477 helper.CreateSnapshots()
13479 if (self.op.shutdown and
13480 instance.admin_state == constants.ADMINST_UP and
13481 not self.op.remove_instance):
13482 assert not activate_disks
13483 feedback_fn("Starting instance %s" % instance.name)
13484 result = self.rpc.call_instance_start(src_node,
13485 (instance, None, None), False)
13486 msg = result.fail_msg
13488 feedback_fn("Failed to start instance: %s" % msg)
13489 _ShutdownInstanceDisks(self, instance)
13490 raise errors.OpExecError("Could not start instance: %s" % msg)
13492 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13493 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13494 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13495 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13496 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13498 (key_name, _, _) = self.x509_key_name
13501 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13504 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13505 key_name, dest_ca_pem,
13510 # Check for backwards compatibility
13511 assert len(dresults) == len(instance.disks)
13512 assert compat.all(isinstance(i, bool) for i in dresults), \
13513 "Not all results are boolean: %r" % dresults
13517 feedback_fn("Deactivating disks for %s" % instance.name)
13518 _ShutdownInstanceDisks(self, instance)
13520 if not (compat.all(dresults) and fin_resu):
13523 failures.append("export finalization")
13524 if not compat.all(dresults):
13525 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13527 failures.append("disk export: disk(s) %s" % fdsk)
13529 raise errors.OpExecError("Export failed, errors in %s" %
13530 utils.CommaJoin(failures))
13532 # At this point, the export was successful, we can cleanup/finish
13534 # Remove instance if requested
13535 if self.op.remove_instance:
13536 feedback_fn("Removing instance %s" % instance.name)
13537 _RemoveInstance(self, feedback_fn, instance,
13538 self.op.ignore_remove_failures)
13540 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13541 self._CleanupExports(feedback_fn)
13543 return fin_resu, dresults
13546 class LUBackupRemove(NoHooksLU):
13547 """Remove exports related to the named instance.
13552 def ExpandNames(self):
13553 self.needed_locks = {}
13554 # We need all nodes to be locked in order for RemoveExport to work, but we
13555 # don't need to lock the instance itself, as nothing will happen to it (and
13556 # we can remove exports also for a removed instance)
13557 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13559 def Exec(self, feedback_fn):
13560 """Remove any export.
13563 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13564 # If the instance was not found we'll try with the name that was passed in.
13565 # This will only work if it was an FQDN, though.
13567 if not instance_name:
13569 instance_name = self.op.instance_name
13571 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13572 exportlist = self.rpc.call_export_list(locked_nodes)
13574 for node in exportlist:
13575 msg = exportlist[node].fail_msg
13577 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13579 if instance_name in exportlist[node].payload:
13581 result = self.rpc.call_export_remove(node, instance_name)
13582 msg = result.fail_msg
13584 logging.error("Could not remove export for instance %s"
13585 " on node %s: %s", instance_name, node, msg)
13587 if fqdn_warn and not found:
13588 feedback_fn("Export not found. If trying to remove an export belonging"
13589 " to a deleted instance please use its Fully Qualified"
13593 class LUGroupAdd(LogicalUnit):
13594 """Logical unit for creating node groups.
13597 HPATH = "group-add"
13598 HTYPE = constants.HTYPE_GROUP
13601 def ExpandNames(self):
13602 # We need the new group's UUID here so that we can create and acquire the
13603 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13604 # that it should not check whether the UUID exists in the configuration.
13605 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13606 self.needed_locks = {}
13607 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13609 def CheckPrereq(self):
13610 """Check prerequisites.
13612 This checks that the given group name is not an existing node group
13617 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13618 except errors.OpPrereqError:
13621 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13622 " node group (UUID: %s)" %
13623 (self.op.group_name, existing_uuid),
13624 errors.ECODE_EXISTS)
13626 if self.op.ndparams:
13627 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13629 if self.op.hv_state:
13630 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13632 self.new_hv_state = None
13634 if self.op.disk_state:
13635 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13637 self.new_disk_state = None
13639 if self.op.diskparams:
13640 for templ in constants.DISK_TEMPLATES:
13641 if templ in self.op.diskparams:
13642 utils.ForceDictType(self.op.diskparams[templ],
13643 constants.DISK_DT_TYPES)
13644 self.new_diskparams = self.op.diskparams
13646 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13647 except errors.OpPrereqError, err:
13648 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13649 errors.ECODE_INVAL)
13651 self.new_diskparams = {}
13653 if self.op.ipolicy:
13654 cluster = self.cfg.GetClusterInfo()
13655 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13657 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13658 except errors.ConfigurationError, err:
13659 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13660 errors.ECODE_INVAL)
13662 def BuildHooksEnv(self):
13663 """Build hooks env.
13667 "GROUP_NAME": self.op.group_name,
13670 def BuildHooksNodes(self):
13671 """Build hooks nodes.
13674 mn = self.cfg.GetMasterNode()
13675 return ([mn], [mn])
13677 def Exec(self, feedback_fn):
13678 """Add the node group to the cluster.
13681 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13682 uuid=self.group_uuid,
13683 alloc_policy=self.op.alloc_policy,
13684 ndparams=self.op.ndparams,
13685 diskparams=self.new_diskparams,
13686 ipolicy=self.op.ipolicy,
13687 hv_state_static=self.new_hv_state,
13688 disk_state_static=self.new_disk_state)
13690 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13691 del self.remove_locks[locking.LEVEL_NODEGROUP]
13694 class LUGroupAssignNodes(NoHooksLU):
13695 """Logical unit for assigning nodes to groups.
13700 def ExpandNames(self):
13701 # These raise errors.OpPrereqError on their own:
13702 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13703 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13705 # We want to lock all the affected nodes and groups. We have readily
13706 # available the list of nodes, and the *destination* group. To gather the
13707 # list of "source" groups, we need to fetch node information later on.
13708 self.needed_locks = {
13709 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13710 locking.LEVEL_NODE: self.op.nodes,
13713 def DeclareLocks(self, level):
13714 if level == locking.LEVEL_NODEGROUP:
13715 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13717 # Try to get all affected nodes' groups without having the group or node
13718 # lock yet. Needs verification later in the code flow.
13719 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13721 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13723 def CheckPrereq(self):
13724 """Check prerequisites.
13727 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13728 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13729 frozenset(self.op.nodes))
13731 expected_locks = (set([self.group_uuid]) |
13732 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13733 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13734 if actual_locks != expected_locks:
13735 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13736 " current groups are '%s', used to be '%s'" %
13737 (utils.CommaJoin(expected_locks),
13738 utils.CommaJoin(actual_locks)))
13740 self.node_data = self.cfg.GetAllNodesInfo()
13741 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13742 instance_data = self.cfg.GetAllInstancesInfo()
13744 if self.group is None:
13745 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13746 (self.op.group_name, self.group_uuid))
13748 (new_splits, previous_splits) = \
13749 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13750 for node in self.op.nodes],
13751 self.node_data, instance_data)
13754 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13756 if not self.op.force:
13757 raise errors.OpExecError("The following instances get split by this"
13758 " change and --force was not given: %s" %
13761 self.LogWarning("This operation will split the following instances: %s",
13764 if previous_splits:
13765 self.LogWarning("In addition, these already-split instances continue"
13766 " to be split across groups: %s",
13767 utils.CommaJoin(utils.NiceSort(previous_splits)))
13769 def Exec(self, feedback_fn):
13770 """Assign nodes to a new group.
13773 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13775 self.cfg.AssignGroupNodes(mods)
13778 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13779 """Check for split instances after a node assignment.
13781 This method considers a series of node assignments as an atomic operation,
13782 and returns information about split instances after applying the set of
13785 In particular, it returns information about newly split instances, and
13786 instances that were already split, and remain so after the change.
13788 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13791 @type changes: list of (node_name, new_group_uuid) pairs.
13792 @param changes: list of node assignments to consider.
13793 @param node_data: a dict with data for all nodes
13794 @param instance_data: a dict with all instances to consider
13795 @rtype: a two-tuple
13796 @return: a list of instances that were previously okay and result split as a
13797 consequence of this change, and a list of instances that were previously
13798 split and this change does not fix.
13801 changed_nodes = dict((node, group) for node, group in changes
13802 if node_data[node].group != group)
13804 all_split_instances = set()
13805 previously_split_instances = set()
13807 def InstanceNodes(instance):
13808 return [instance.primary_node] + list(instance.secondary_nodes)
13810 for inst in instance_data.values():
13811 if inst.disk_template not in constants.DTS_INT_MIRROR:
13814 instance_nodes = InstanceNodes(inst)
13816 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13817 previously_split_instances.add(inst.name)
13819 if len(set(changed_nodes.get(node, node_data[node].group)
13820 for node in instance_nodes)) > 1:
13821 all_split_instances.add(inst.name)
13823 return (list(all_split_instances - previously_split_instances),
13824 list(previously_split_instances & all_split_instances))
13827 class _GroupQuery(_QueryBase):
13828 FIELDS = query.GROUP_FIELDS
13830 def ExpandNames(self, lu):
13831 lu.needed_locks = {}
13833 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13834 self._cluster = lu.cfg.GetClusterInfo()
13835 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13838 self.wanted = [name_to_uuid[name]
13839 for name in utils.NiceSort(name_to_uuid.keys())]
13841 # Accept names to be either names or UUIDs.
13844 all_uuid = frozenset(self._all_groups.keys())
13846 for name in self.names:
13847 if name in all_uuid:
13848 self.wanted.append(name)
13849 elif name in name_to_uuid:
13850 self.wanted.append(name_to_uuid[name])
13852 missing.append(name)
13855 raise errors.OpPrereqError("Some groups do not exist: %s" %
13856 utils.CommaJoin(missing),
13857 errors.ECODE_NOENT)
13859 def DeclareLocks(self, lu, level):
13862 def _GetQueryData(self, lu):
13863 """Computes the list of node groups and their attributes.
13866 do_nodes = query.GQ_NODE in self.requested_data
13867 do_instances = query.GQ_INST in self.requested_data
13869 group_to_nodes = None
13870 group_to_instances = None
13872 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13873 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13874 # latter GetAllInstancesInfo() is not enough, for we have to go through
13875 # instance->node. Hence, we will need to process nodes even if we only need
13876 # instance information.
13877 if do_nodes or do_instances:
13878 all_nodes = lu.cfg.GetAllNodesInfo()
13879 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13882 for node in all_nodes.values():
13883 if node.group in group_to_nodes:
13884 group_to_nodes[node.group].append(node.name)
13885 node_to_group[node.name] = node.group
13888 all_instances = lu.cfg.GetAllInstancesInfo()
13889 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13891 for instance in all_instances.values():
13892 node = instance.primary_node
13893 if node in node_to_group:
13894 group_to_instances[node_to_group[node]].append(instance.name)
13897 # Do not pass on node information if it was not requested.
13898 group_to_nodes = None
13900 return query.GroupQueryData(self._cluster,
13901 [self._all_groups[uuid]
13902 for uuid in self.wanted],
13903 group_to_nodes, group_to_instances,
13904 query.GQ_DISKPARAMS in self.requested_data)
13907 class LUGroupQuery(NoHooksLU):
13908 """Logical unit for querying node groups.
13913 def CheckArguments(self):
13914 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13915 self.op.output_fields, False)
13917 def ExpandNames(self):
13918 self.gq.ExpandNames(self)
13920 def DeclareLocks(self, level):
13921 self.gq.DeclareLocks(self, level)
13923 def Exec(self, feedback_fn):
13924 return self.gq.OldStyleQuery(self)
13927 class LUGroupSetParams(LogicalUnit):
13928 """Modifies the parameters of a node group.
13931 HPATH = "group-modify"
13932 HTYPE = constants.HTYPE_GROUP
13935 def CheckArguments(self):
13938 self.op.diskparams,
13939 self.op.alloc_policy,
13941 self.op.disk_state,
13945 if all_changes.count(None) == len(all_changes):
13946 raise errors.OpPrereqError("Please pass at least one modification",
13947 errors.ECODE_INVAL)
13949 def ExpandNames(self):
13950 # This raises errors.OpPrereqError on its own:
13951 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13953 self.needed_locks = {
13954 locking.LEVEL_INSTANCE: [],
13955 locking.LEVEL_NODEGROUP: [self.group_uuid],
13958 self.share_locks[locking.LEVEL_INSTANCE] = 1
13960 def DeclareLocks(self, level):
13961 if level == locking.LEVEL_INSTANCE:
13962 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13964 # Lock instances optimistically, needs verification once group lock has
13966 self.needed_locks[locking.LEVEL_INSTANCE] = \
13967 self.cfg.GetNodeGroupInstances(self.group_uuid)
13970 def _UpdateAndVerifyDiskParams(old, new):
13971 """Updates and verifies disk parameters.
13974 new_params = _GetUpdatedParams(old, new)
13975 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13978 def CheckPrereq(self):
13979 """Check prerequisites.
13982 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13984 # Check if locked instances are still correct
13985 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13987 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13988 cluster = self.cfg.GetClusterInfo()
13990 if self.group is None:
13991 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13992 (self.op.group_name, self.group_uuid))
13994 if self.op.ndparams:
13995 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13996 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13997 self.new_ndparams = new_ndparams
13999 if self.op.diskparams:
14000 diskparams = self.group.diskparams
14001 uavdp = self._UpdateAndVerifyDiskParams
14002 # For each disktemplate subdict update and verify the values
14003 new_diskparams = dict((dt,
14004 uavdp(diskparams.get(dt, {}),
14005 self.op.diskparams[dt]))
14006 for dt in constants.DISK_TEMPLATES
14007 if dt in self.op.diskparams)
14008 # As we've all subdicts of diskparams ready, lets merge the actual
14009 # dict with all updated subdicts
14010 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14012 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14013 except errors.OpPrereqError, err:
14014 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14015 errors.ECODE_INVAL)
14017 if self.op.hv_state:
14018 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14019 self.group.hv_state_static)
14021 if self.op.disk_state:
14022 self.new_disk_state = \
14023 _MergeAndVerifyDiskState(self.op.disk_state,
14024 self.group.disk_state_static)
14026 if self.op.ipolicy:
14027 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14031 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14032 inst_filter = lambda inst: inst.name in owned_instances
14033 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14035 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14037 new_ipolicy, instances)
14040 self.LogWarning("After the ipolicy change the following instances"
14041 " violate them: %s",
14042 utils.CommaJoin(violations))
14044 def BuildHooksEnv(self):
14045 """Build hooks env.
14049 "GROUP_NAME": self.op.group_name,
14050 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14053 def BuildHooksNodes(self):
14054 """Build hooks nodes.
14057 mn = self.cfg.GetMasterNode()
14058 return ([mn], [mn])
14060 def Exec(self, feedback_fn):
14061 """Modifies the node group.
14066 if self.op.ndparams:
14067 self.group.ndparams = self.new_ndparams
14068 result.append(("ndparams", str(self.group.ndparams)))
14070 if self.op.diskparams:
14071 self.group.diskparams = self.new_diskparams
14072 result.append(("diskparams", str(self.group.diskparams)))
14074 if self.op.alloc_policy:
14075 self.group.alloc_policy = self.op.alloc_policy
14077 if self.op.hv_state:
14078 self.group.hv_state_static = self.new_hv_state
14080 if self.op.disk_state:
14081 self.group.disk_state_static = self.new_disk_state
14083 if self.op.ipolicy:
14084 self.group.ipolicy = self.new_ipolicy
14086 self.cfg.Update(self.group, feedback_fn)
14090 class LUGroupRemove(LogicalUnit):
14091 HPATH = "group-remove"
14092 HTYPE = constants.HTYPE_GROUP
14095 def ExpandNames(self):
14096 # This will raises errors.OpPrereqError on its own:
14097 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14098 self.needed_locks = {
14099 locking.LEVEL_NODEGROUP: [self.group_uuid],
14102 def CheckPrereq(self):
14103 """Check prerequisites.
14105 This checks that the given group name exists as a node group, that is
14106 empty (i.e., contains no nodes), and that is not the last group of the
14110 # Verify that the group is empty.
14111 group_nodes = [node.name
14112 for node in self.cfg.GetAllNodesInfo().values()
14113 if node.group == self.group_uuid]
14116 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14118 (self.op.group_name,
14119 utils.CommaJoin(utils.NiceSort(group_nodes))),
14120 errors.ECODE_STATE)
14122 # Verify the cluster would not be left group-less.
14123 if len(self.cfg.GetNodeGroupList()) == 1:
14124 raise errors.OpPrereqError("Group '%s' is the only group,"
14125 " cannot be removed" %
14126 self.op.group_name,
14127 errors.ECODE_STATE)
14129 def BuildHooksEnv(self):
14130 """Build hooks env.
14134 "GROUP_NAME": self.op.group_name,
14137 def BuildHooksNodes(self):
14138 """Build hooks nodes.
14141 mn = self.cfg.GetMasterNode()
14142 return ([mn], [mn])
14144 def Exec(self, feedback_fn):
14145 """Remove the node group.
14149 self.cfg.RemoveNodeGroup(self.group_uuid)
14150 except errors.ConfigurationError:
14151 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14152 (self.op.group_name, self.group_uuid))
14154 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14157 class LUGroupRename(LogicalUnit):
14158 HPATH = "group-rename"
14159 HTYPE = constants.HTYPE_GROUP
14162 def ExpandNames(self):
14163 # This raises errors.OpPrereqError on its own:
14164 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14166 self.needed_locks = {
14167 locking.LEVEL_NODEGROUP: [self.group_uuid],
14170 def CheckPrereq(self):
14171 """Check prerequisites.
14173 Ensures requested new name is not yet used.
14177 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14178 except errors.OpPrereqError:
14181 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14182 " node group (UUID: %s)" %
14183 (self.op.new_name, new_name_uuid),
14184 errors.ECODE_EXISTS)
14186 def BuildHooksEnv(self):
14187 """Build hooks env.
14191 "OLD_NAME": self.op.group_name,
14192 "NEW_NAME": self.op.new_name,
14195 def BuildHooksNodes(self):
14196 """Build hooks nodes.
14199 mn = self.cfg.GetMasterNode()
14201 all_nodes = self.cfg.GetAllNodesInfo()
14202 all_nodes.pop(mn, None)
14205 run_nodes.extend(node.name for node in all_nodes.values()
14206 if node.group == self.group_uuid)
14208 return (run_nodes, run_nodes)
14210 def Exec(self, feedback_fn):
14211 """Rename the node group.
14214 group = self.cfg.GetNodeGroup(self.group_uuid)
14217 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14218 (self.op.group_name, self.group_uuid))
14220 group.name = self.op.new_name
14221 self.cfg.Update(group, feedback_fn)
14223 return self.op.new_name
14226 class LUGroupEvacuate(LogicalUnit):
14227 HPATH = "group-evacuate"
14228 HTYPE = constants.HTYPE_GROUP
14231 def ExpandNames(self):
14232 # This raises errors.OpPrereqError on its own:
14233 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14235 if self.op.target_groups:
14236 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14237 self.op.target_groups)
14239 self.req_target_uuids = []
14241 if self.group_uuid in self.req_target_uuids:
14242 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14243 " as a target group (targets are %s)" %
14245 utils.CommaJoin(self.req_target_uuids)),
14246 errors.ECODE_INVAL)
14248 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14250 self.share_locks = _ShareAll()
14251 self.needed_locks = {
14252 locking.LEVEL_INSTANCE: [],
14253 locking.LEVEL_NODEGROUP: [],
14254 locking.LEVEL_NODE: [],
14257 def DeclareLocks(self, level):
14258 if level == locking.LEVEL_INSTANCE:
14259 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14261 # Lock instances optimistically, needs verification once node and group
14262 # locks have been acquired
14263 self.needed_locks[locking.LEVEL_INSTANCE] = \
14264 self.cfg.GetNodeGroupInstances(self.group_uuid)
14266 elif level == locking.LEVEL_NODEGROUP:
14267 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14269 if self.req_target_uuids:
14270 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14272 # Lock all groups used by instances optimistically; this requires going
14273 # via the node before it's locked, requiring verification later on
14274 lock_groups.update(group_uuid
14275 for instance_name in
14276 self.owned_locks(locking.LEVEL_INSTANCE)
14278 self.cfg.GetInstanceNodeGroups(instance_name))
14280 # No target groups, need to lock all of them
14281 lock_groups = locking.ALL_SET
14283 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14285 elif level == locking.LEVEL_NODE:
14286 # This will only lock the nodes in the group to be evacuated which
14287 # contain actual instances
14288 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14289 self._LockInstancesNodes()
14291 # Lock all nodes in group to be evacuated and target groups
14292 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14293 assert self.group_uuid in owned_groups
14294 member_nodes = [node_name
14295 for group in owned_groups
14296 for node_name in self.cfg.GetNodeGroup(group).members]
14297 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14299 def CheckPrereq(self):
14300 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14301 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14302 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14304 assert owned_groups.issuperset(self.req_target_uuids)
14305 assert self.group_uuid in owned_groups
14307 # Check if locked instances are still correct
14308 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14310 # Get instance information
14311 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14313 # Check if node groups for locked instances are still correct
14314 _CheckInstancesNodeGroups(self.cfg, self.instances,
14315 owned_groups, owned_nodes, self.group_uuid)
14317 if self.req_target_uuids:
14318 # User requested specific target groups
14319 self.target_uuids = self.req_target_uuids
14321 # All groups except the one to be evacuated are potential targets
14322 self.target_uuids = [group_uuid for group_uuid in owned_groups
14323 if group_uuid != self.group_uuid]
14325 if not self.target_uuids:
14326 raise errors.OpPrereqError("There are no possible target groups",
14327 errors.ECODE_INVAL)
14329 def BuildHooksEnv(self):
14330 """Build hooks env.
14334 "GROUP_NAME": self.op.group_name,
14335 "TARGET_GROUPS": " ".join(self.target_uuids),
14338 def BuildHooksNodes(self):
14339 """Build hooks nodes.
14342 mn = self.cfg.GetMasterNode()
14344 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14346 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14348 return (run_nodes, run_nodes)
14350 def Exec(self, feedback_fn):
14351 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14353 assert self.group_uuid not in self.target_uuids
14355 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14356 instances=instances, target_groups=self.target_uuids)
14358 ial.Run(self.op.iallocator)
14360 if not ial.success:
14361 raise errors.OpPrereqError("Can't compute group evacuation using"
14362 " iallocator '%s': %s" %
14363 (self.op.iallocator, ial.info),
14364 errors.ECODE_NORES)
14366 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14368 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14369 len(jobs), self.op.group_name)
14371 return ResultWithJobs(jobs)
14374 class TagsLU(NoHooksLU): # pylint: disable=W0223
14375 """Generic tags LU.
14377 This is an abstract class which is the parent of all the other tags LUs.
14380 def ExpandNames(self):
14381 self.group_uuid = None
14382 self.needed_locks = {}
14384 if self.op.kind == constants.TAG_NODE:
14385 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14386 lock_level = locking.LEVEL_NODE
14387 lock_name = self.op.name
14388 elif self.op.kind == constants.TAG_INSTANCE:
14389 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14390 lock_level = locking.LEVEL_INSTANCE
14391 lock_name = self.op.name
14392 elif self.op.kind == constants.TAG_NODEGROUP:
14393 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14394 lock_level = locking.LEVEL_NODEGROUP
14395 lock_name = self.group_uuid
14400 if lock_level and getattr(self.op, "use_locking", True):
14401 self.needed_locks[lock_level] = lock_name
14403 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14404 # not possible to acquire the BGL based on opcode parameters)
14406 def CheckPrereq(self):
14407 """Check prerequisites.
14410 if self.op.kind == constants.TAG_CLUSTER:
14411 self.target = self.cfg.GetClusterInfo()
14412 elif self.op.kind == constants.TAG_NODE:
14413 self.target = self.cfg.GetNodeInfo(self.op.name)
14414 elif self.op.kind == constants.TAG_INSTANCE:
14415 self.target = self.cfg.GetInstanceInfo(self.op.name)
14416 elif self.op.kind == constants.TAG_NODEGROUP:
14417 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14419 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14420 str(self.op.kind), errors.ECODE_INVAL)
14423 class LUTagsGet(TagsLU):
14424 """Returns the tags of a given object.
14429 def ExpandNames(self):
14430 TagsLU.ExpandNames(self)
14432 # Share locks as this is only a read operation
14433 self.share_locks = _ShareAll()
14435 def Exec(self, feedback_fn):
14436 """Returns the tag list.
14439 return list(self.target.GetTags())
14442 class LUTagsSearch(NoHooksLU):
14443 """Searches the tags for a given pattern.
14448 def ExpandNames(self):
14449 self.needed_locks = {}
14451 def CheckPrereq(self):
14452 """Check prerequisites.
14454 This checks the pattern passed for validity by compiling it.
14458 self.re = re.compile(self.op.pattern)
14459 except re.error, err:
14460 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14461 (self.op.pattern, err), errors.ECODE_INVAL)
14463 def Exec(self, feedback_fn):
14464 """Returns the tag list.
14468 tgts = [("/cluster", cfg.GetClusterInfo())]
14469 ilist = cfg.GetAllInstancesInfo().values()
14470 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14471 nlist = cfg.GetAllNodesInfo().values()
14472 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14473 tgts.extend(("/nodegroup/%s" % n.name, n)
14474 for n in cfg.GetAllNodeGroupsInfo().values())
14476 for path, target in tgts:
14477 for tag in target.GetTags():
14478 if self.re.search(tag):
14479 results.append((path, tag))
14483 class LUTagsSet(TagsLU):
14484 """Sets a tag on a given object.
14489 def CheckPrereq(self):
14490 """Check prerequisites.
14492 This checks the type and length of the tag name and value.
14495 TagsLU.CheckPrereq(self)
14496 for tag in self.op.tags:
14497 objects.TaggableObject.ValidateTag(tag)
14499 def Exec(self, feedback_fn):
14504 for tag in self.op.tags:
14505 self.target.AddTag(tag)
14506 except errors.TagError, err:
14507 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14508 self.cfg.Update(self.target, feedback_fn)
14511 class LUTagsDel(TagsLU):
14512 """Delete a list of tags from a given object.
14517 def CheckPrereq(self):
14518 """Check prerequisites.
14520 This checks that we have the given tag.
14523 TagsLU.CheckPrereq(self)
14524 for tag in self.op.tags:
14525 objects.TaggableObject.ValidateTag(tag)
14526 del_tags = frozenset(self.op.tags)
14527 cur_tags = self.target.GetTags()
14529 diff_tags = del_tags - cur_tags
14531 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14532 raise errors.OpPrereqError("Tag(s) %s not found" %
14533 (utils.CommaJoin(diff_names), ),
14534 errors.ECODE_NOENT)
14536 def Exec(self, feedback_fn):
14537 """Remove the tag from the object.
14540 for tag in self.op.tags:
14541 self.target.RemoveTag(tag)
14542 self.cfg.Update(self.target, feedback_fn)
14545 class LUTestDelay(NoHooksLU):
14546 """Sleep for a specified amount of time.
14548 This LU sleeps on the master and/or nodes for a specified amount of
14554 def ExpandNames(self):
14555 """Expand names and set required locks.
14557 This expands the node list, if any.
14560 self.needed_locks = {}
14561 if self.op.on_nodes:
14562 # _GetWantedNodes can be used here, but is not always appropriate to use
14563 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14564 # more information.
14565 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14566 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14568 def _TestDelay(self):
14569 """Do the actual sleep.
14572 if self.op.on_master:
14573 if not utils.TestDelay(self.op.duration):
14574 raise errors.OpExecError("Error during master delay test")
14575 if self.op.on_nodes:
14576 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14577 for node, node_result in result.items():
14578 node_result.Raise("Failure during rpc call to node %s" % node)
14580 def Exec(self, feedback_fn):
14581 """Execute the test delay opcode, with the wanted repetitions.
14584 if self.op.repeat == 0:
14587 top_value = self.op.repeat - 1
14588 for i in range(self.op.repeat):
14589 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14593 class LUTestJqueue(NoHooksLU):
14594 """Utility LU to test some aspects of the job queue.
14599 # Must be lower than default timeout for WaitForJobChange to see whether it
14600 # notices changed jobs
14601 _CLIENT_CONNECT_TIMEOUT = 20.0
14602 _CLIENT_CONFIRM_TIMEOUT = 60.0
14605 def _NotifyUsingSocket(cls, cb, errcls):
14606 """Opens a Unix socket and waits for another program to connect.
14609 @param cb: Callback to send socket name to client
14610 @type errcls: class
14611 @param errcls: Exception class to use for errors
14614 # Using a temporary directory as there's no easy way to create temporary
14615 # sockets without writing a custom loop around tempfile.mktemp and
14617 tmpdir = tempfile.mkdtemp()
14619 tmpsock = utils.PathJoin(tmpdir, "sock")
14621 logging.debug("Creating temporary socket at %s", tmpsock)
14622 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14627 # Send details to client
14630 # Wait for client to connect before continuing
14631 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14633 (conn, _) = sock.accept()
14634 except socket.error, err:
14635 raise errcls("Client didn't connect in time (%s)" % err)
14639 # Remove as soon as client is connected
14640 shutil.rmtree(tmpdir)
14642 # Wait for client to close
14645 # pylint: disable=E1101
14646 # Instance of '_socketobject' has no ... member
14647 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14649 except socket.error, err:
14650 raise errcls("Client failed to confirm notification (%s)" % err)
14654 def _SendNotification(self, test, arg, sockname):
14655 """Sends a notification to the client.
14658 @param test: Test name
14659 @param arg: Test argument (depends on test)
14660 @type sockname: string
14661 @param sockname: Socket path
14664 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14666 def _Notify(self, prereq, test, arg):
14667 """Notifies the client of a test.
14670 @param prereq: Whether this is a prereq-phase test
14672 @param test: Test name
14673 @param arg: Test argument (depends on test)
14677 errcls = errors.OpPrereqError
14679 errcls = errors.OpExecError
14681 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14685 def CheckArguments(self):
14686 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14687 self.expandnames_calls = 0
14689 def ExpandNames(self):
14690 checkargs_calls = getattr(self, "checkargs_calls", 0)
14691 if checkargs_calls < 1:
14692 raise errors.ProgrammerError("CheckArguments was not called")
14694 self.expandnames_calls += 1
14696 if self.op.notify_waitlock:
14697 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14699 self.LogInfo("Expanding names")
14701 # Get lock on master node (just to get a lock, not for a particular reason)
14702 self.needed_locks = {
14703 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14706 def Exec(self, feedback_fn):
14707 if self.expandnames_calls < 1:
14708 raise errors.ProgrammerError("ExpandNames was not called")
14710 if self.op.notify_exec:
14711 self._Notify(False, constants.JQT_EXEC, None)
14713 self.LogInfo("Executing")
14715 if self.op.log_messages:
14716 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14717 for idx, msg in enumerate(self.op.log_messages):
14718 self.LogInfo("Sending log message %s", idx + 1)
14719 feedback_fn(constants.JQT_MSGPREFIX + msg)
14720 # Report how many test messages have been sent
14721 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14724 raise errors.OpExecError("Opcode failure was requested")
14729 class IAllocator(object):
14730 """IAllocator framework.
14732 An IAllocator instance has three sets of attributes:
14733 - cfg that is needed to query the cluster
14734 - input data (all members of the _KEYS class attribute are required)
14735 - four buffer attributes (in|out_data|text), that represent the
14736 input (to the external script) in text and data structure format,
14737 and the output from it, again in two formats
14738 - the result variables from the script (success, info, nodes) for
14742 # pylint: disable=R0902
14743 # lots of instance attributes
14745 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14747 self.rpc = rpc_runner
14748 # init buffer variables
14749 self.in_text = self.out_text = self.in_data = self.out_data = None
14750 # init all input fields so that pylint is happy
14752 self.memory = self.disks = self.disk_template = self.spindle_use = None
14753 self.os = self.tags = self.nics = self.vcpus = None
14754 self.hypervisor = None
14755 self.relocate_from = None
14757 self.instances = None
14758 self.evac_mode = None
14759 self.target_groups = []
14761 self.required_nodes = None
14762 # init result fields
14763 self.success = self.info = self.result = None
14766 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14768 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14769 " IAllocator" % self.mode)
14771 keyset = [n for (n, _) in keydata]
14774 if key not in keyset:
14775 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14776 " IAllocator" % key)
14777 setattr(self, key, kwargs[key])
14780 if key not in kwargs:
14781 raise errors.ProgrammerError("Missing input parameter '%s' to"
14782 " IAllocator" % key)
14783 self._BuildInputData(compat.partial(fn, self), keydata)
14785 def _ComputeClusterData(self):
14786 """Compute the generic allocator input data.
14788 This is the data that is independent of the actual operation.
14792 cluster_info = cfg.GetClusterInfo()
14795 "version": constants.IALLOCATOR_VERSION,
14796 "cluster_name": cfg.GetClusterName(),
14797 "cluster_tags": list(cluster_info.GetTags()),
14798 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14799 "ipolicy": cluster_info.ipolicy,
14801 ninfo = cfg.GetAllNodesInfo()
14802 iinfo = cfg.GetAllInstancesInfo().values()
14803 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14806 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14808 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14809 hypervisor_name = self.hypervisor
14810 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14811 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14813 hypervisor_name = cluster_info.primary_hypervisor
14815 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14818 self.rpc.call_all_instances_info(node_list,
14819 cluster_info.enabled_hypervisors)
14821 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14823 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14824 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14825 i_list, config_ndata)
14826 assert len(data["nodes"]) == len(ninfo), \
14827 "Incomplete node data computed"
14829 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14831 self.in_data = data
14834 def _ComputeNodeGroupData(cfg):
14835 """Compute node groups data.
14838 cluster = cfg.GetClusterInfo()
14839 ng = dict((guuid, {
14840 "name": gdata.name,
14841 "alloc_policy": gdata.alloc_policy,
14842 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14844 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14849 def _ComputeBasicNodeData(cfg, node_cfg):
14850 """Compute global node data.
14853 @returns: a dict of name: (node dict, node config)
14856 # fill in static (config-based) values
14857 node_results = dict((ninfo.name, {
14858 "tags": list(ninfo.GetTags()),
14859 "primary_ip": ninfo.primary_ip,
14860 "secondary_ip": ninfo.secondary_ip,
14861 "offline": ninfo.offline,
14862 "drained": ninfo.drained,
14863 "master_candidate": ninfo.master_candidate,
14864 "group": ninfo.group,
14865 "master_capable": ninfo.master_capable,
14866 "vm_capable": ninfo.vm_capable,
14867 "ndparams": cfg.GetNdParams(ninfo),
14869 for ninfo in node_cfg.values())
14871 return node_results
14874 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14876 """Compute global node data.
14878 @param node_results: the basic node structures as filled from the config
14881 #TODO(dynmem): compute the right data on MAX and MIN memory
14882 # make a copy of the current dict
14883 node_results = dict(node_results)
14884 for nname, nresult in node_data.items():
14885 assert nname in node_results, "Missing basic data for node %s" % nname
14886 ninfo = node_cfg[nname]
14888 if not (ninfo.offline or ninfo.drained):
14889 nresult.Raise("Can't get data for node %s" % nname)
14890 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14892 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14894 for attr in ["memory_total", "memory_free", "memory_dom0",
14895 "vg_size", "vg_free", "cpu_total"]:
14896 if attr not in remote_info:
14897 raise errors.OpExecError("Node '%s' didn't return attribute"
14898 " '%s'" % (nname, attr))
14899 if not isinstance(remote_info[attr], int):
14900 raise errors.OpExecError("Node '%s' returned invalid value"
14902 (nname, attr, remote_info[attr]))
14903 # compute memory used by primary instances
14904 i_p_mem = i_p_up_mem = 0
14905 for iinfo, beinfo in i_list:
14906 if iinfo.primary_node == nname:
14907 i_p_mem += beinfo[constants.BE_MAXMEM]
14908 if iinfo.name not in node_iinfo[nname].payload:
14911 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14912 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14913 remote_info["memory_free"] -= max(0, i_mem_diff)
14915 if iinfo.admin_state == constants.ADMINST_UP:
14916 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14918 # compute memory used by instances
14920 "total_memory": remote_info["memory_total"],
14921 "reserved_memory": remote_info["memory_dom0"],
14922 "free_memory": remote_info["memory_free"],
14923 "total_disk": remote_info["vg_size"],
14924 "free_disk": remote_info["vg_free"],
14925 "total_cpus": remote_info["cpu_total"],
14926 "i_pri_memory": i_p_mem,
14927 "i_pri_up_memory": i_p_up_mem,
14929 pnr_dyn.update(node_results[nname])
14930 node_results[nname] = pnr_dyn
14932 return node_results
14935 def _ComputeInstanceData(cluster_info, i_list):
14936 """Compute global instance data.
14940 for iinfo, beinfo in i_list:
14942 for nic in iinfo.nics:
14943 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14947 "mode": filled_params[constants.NIC_MODE],
14948 "link": filled_params[constants.NIC_LINK],
14950 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14951 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14952 nic_data.append(nic_dict)
14954 "tags": list(iinfo.GetTags()),
14955 "admin_state": iinfo.admin_state,
14956 "vcpus": beinfo[constants.BE_VCPUS],
14957 "memory": beinfo[constants.BE_MAXMEM],
14958 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14960 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14962 "disks": [{constants.IDISK_SIZE: dsk.size,
14963 constants.IDISK_MODE: dsk.mode}
14964 for dsk in iinfo.disks],
14965 "disk_template": iinfo.disk_template,
14966 "hypervisor": iinfo.hypervisor,
14968 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14970 instance_data[iinfo.name] = pir
14972 return instance_data
14974 def _AddNewInstance(self):
14975 """Add new instance data to allocator structure.
14977 This in combination with _AllocatorGetClusterData will create the
14978 correct structure needed as input for the allocator.
14980 The checks for the completeness of the opcode must have already been
14984 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14986 if self.disk_template in constants.DTS_INT_MIRROR:
14987 self.required_nodes = 2
14989 self.required_nodes = 1
14993 "disk_template": self.disk_template,
14996 "vcpus": self.vcpus,
14997 "memory": self.memory,
14998 "spindle_use": self.spindle_use,
14999 "disks": self.disks,
15000 "disk_space_total": disk_space,
15002 "required_nodes": self.required_nodes,
15003 "hypervisor": self.hypervisor,
15008 def _AddRelocateInstance(self):
15009 """Add relocate instance data to allocator structure.
15011 This in combination with _IAllocatorGetClusterData will create the
15012 correct structure needed as input for the allocator.
15014 The checks for the completeness of the opcode must have already been
15018 instance = self.cfg.GetInstanceInfo(self.name)
15019 if instance is None:
15020 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15021 " IAllocator" % self.name)
15023 if instance.disk_template not in constants.DTS_MIRRORED:
15024 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15025 errors.ECODE_INVAL)
15027 if instance.disk_template in constants.DTS_INT_MIRROR and \
15028 len(instance.secondary_nodes) != 1:
15029 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15030 errors.ECODE_STATE)
15032 self.required_nodes = 1
15033 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15034 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15038 "disk_space_total": disk_space,
15039 "required_nodes": self.required_nodes,
15040 "relocate_from": self.relocate_from,
15044 def _AddNodeEvacuate(self):
15045 """Get data for node-evacuate requests.
15049 "instances": self.instances,
15050 "evac_mode": self.evac_mode,
15053 def _AddChangeGroup(self):
15054 """Get data for node-evacuate requests.
15058 "instances": self.instances,
15059 "target_groups": self.target_groups,
15062 def _BuildInputData(self, fn, keydata):
15063 """Build input data structures.
15066 self._ComputeClusterData()
15069 request["type"] = self.mode
15070 for keyname, keytype in keydata:
15071 if keyname not in request:
15072 raise errors.ProgrammerError("Request parameter %s is missing" %
15074 val = request[keyname]
15075 if not keytype(val):
15076 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15077 " validation, value %s, expected"
15078 " type %s" % (keyname, val, keytype))
15079 self.in_data["request"] = request
15081 self.in_text = serializer.Dump(self.in_data)
15083 _STRING_LIST = ht.TListOf(ht.TString)
15084 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15085 # pylint: disable=E1101
15086 # Class '...' has no 'OP_ID' member
15087 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15088 opcodes.OpInstanceMigrate.OP_ID,
15089 opcodes.OpInstanceReplaceDisks.OP_ID])
15093 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15094 ht.TItems([ht.TNonEmptyString,
15095 ht.TNonEmptyString,
15096 ht.TListOf(ht.TNonEmptyString),
15099 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15100 ht.TItems([ht.TNonEmptyString,
15103 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15104 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15107 constants.IALLOCATOR_MODE_ALLOC:
15110 ("name", ht.TString),
15111 ("memory", ht.TInt),
15112 ("spindle_use", ht.TInt),
15113 ("disks", ht.TListOf(ht.TDict)),
15114 ("disk_template", ht.TString),
15115 ("os", ht.TString),
15116 ("tags", _STRING_LIST),
15117 ("nics", ht.TListOf(ht.TDict)),
15118 ("vcpus", ht.TInt),
15119 ("hypervisor", ht.TString),
15121 constants.IALLOCATOR_MODE_RELOC:
15122 (_AddRelocateInstance,
15123 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15125 constants.IALLOCATOR_MODE_NODE_EVAC:
15126 (_AddNodeEvacuate, [
15127 ("instances", _STRING_LIST),
15128 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15130 constants.IALLOCATOR_MODE_CHG_GROUP:
15131 (_AddChangeGroup, [
15132 ("instances", _STRING_LIST),
15133 ("target_groups", _STRING_LIST),
15137 def Run(self, name, validate=True, call_fn=None):
15138 """Run an instance allocator and return the results.
15141 if call_fn is None:
15142 call_fn = self.rpc.call_iallocator_runner
15144 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15145 result.Raise("Failure while running the iallocator script")
15147 self.out_text = result.payload
15149 self._ValidateResult()
15151 def _ValidateResult(self):
15152 """Process the allocator results.
15154 This will process and if successful save the result in
15155 self.out_data and the other parameters.
15159 rdict = serializer.Load(self.out_text)
15160 except Exception, err:
15161 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15163 if not isinstance(rdict, dict):
15164 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15166 # TODO: remove backwards compatiblity in later versions
15167 if "nodes" in rdict and "result" not in rdict:
15168 rdict["result"] = rdict["nodes"]
15171 for key in "success", "info", "result":
15172 if key not in rdict:
15173 raise errors.OpExecError("Can't parse iallocator results:"
15174 " missing key '%s'" % key)
15175 setattr(self, key, rdict[key])
15177 if not self._result_check(self.result):
15178 raise errors.OpExecError("Iallocator returned invalid result,"
15179 " expected %s, got %s" %
15180 (self._result_check, self.result),
15181 errors.ECODE_INVAL)
15183 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15184 assert self.relocate_from is not None
15185 assert self.required_nodes == 1
15187 node2group = dict((name, ndata["group"])
15188 for (name, ndata) in self.in_data["nodes"].items())
15190 fn = compat.partial(self._NodesToGroups, node2group,
15191 self.in_data["nodegroups"])
15193 instance = self.cfg.GetInstanceInfo(self.name)
15194 request_groups = fn(self.relocate_from + [instance.primary_node])
15195 result_groups = fn(rdict["result"] + [instance.primary_node])
15197 if self.success and not set(result_groups).issubset(request_groups):
15198 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15199 " differ from original groups (%s)" %
15200 (utils.CommaJoin(result_groups),
15201 utils.CommaJoin(request_groups)))
15203 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15204 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15206 self.out_data = rdict
15209 def _NodesToGroups(node2group, groups, nodes):
15210 """Returns a list of unique group names for a list of nodes.
15212 @type node2group: dict
15213 @param node2group: Map from node name to group UUID
15215 @param groups: Group information
15217 @param nodes: Node names
15224 group_uuid = node2group[node]
15226 # Ignore unknown node
15230 group = groups[group_uuid]
15232 # Can't find group, let's use UUID
15233 group_name = group_uuid
15235 group_name = group["name"]
15237 result.add(group_name)
15239 return sorted(result)
15242 class LUTestAllocator(NoHooksLU):
15243 """Run allocator tests.
15245 This LU runs the allocator tests
15248 def CheckPrereq(self):
15249 """Check prerequisites.
15251 This checks the opcode parameters depending on the director and mode test.
15254 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15255 for attr in ["memory", "disks", "disk_template",
15256 "os", "tags", "nics", "vcpus"]:
15257 if not hasattr(self.op, attr):
15258 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15259 attr, errors.ECODE_INVAL)
15260 iname = self.cfg.ExpandInstanceName(self.op.name)
15261 if iname is not None:
15262 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15263 iname, errors.ECODE_EXISTS)
15264 if not isinstance(self.op.nics, list):
15265 raise errors.OpPrereqError("Invalid parameter 'nics'",
15266 errors.ECODE_INVAL)
15267 if not isinstance(self.op.disks, list):
15268 raise errors.OpPrereqError("Invalid parameter 'disks'",
15269 errors.ECODE_INVAL)
15270 for row in self.op.disks:
15271 if (not isinstance(row, dict) or
15272 constants.IDISK_SIZE not in row or
15273 not isinstance(row[constants.IDISK_SIZE], int) or
15274 constants.IDISK_MODE not in row or
15275 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15276 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15277 " parameter", errors.ECODE_INVAL)
15278 if self.op.hypervisor is None:
15279 self.op.hypervisor = self.cfg.GetHypervisorType()
15280 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15281 fname = _ExpandInstanceName(self.cfg, self.op.name)
15282 self.op.name = fname
15283 self.relocate_from = \
15284 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15285 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15286 constants.IALLOCATOR_MODE_NODE_EVAC):
15287 if not self.op.instances:
15288 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15289 self.op.instances = _GetWantedInstances(self, self.op.instances)
15291 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15292 self.op.mode, errors.ECODE_INVAL)
15294 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15295 if self.op.allocator is None:
15296 raise errors.OpPrereqError("Missing allocator name",
15297 errors.ECODE_INVAL)
15298 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15299 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15300 self.op.direction, errors.ECODE_INVAL)
15302 def Exec(self, feedback_fn):
15303 """Run the allocator test.
15306 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15307 ial = IAllocator(self.cfg, self.rpc,
15310 memory=self.op.memory,
15311 disks=self.op.disks,
15312 disk_template=self.op.disk_template,
15316 vcpus=self.op.vcpus,
15317 hypervisor=self.op.hypervisor,
15319 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15320 ial = IAllocator(self.cfg, self.rpc,
15323 relocate_from=list(self.relocate_from),
15325 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15326 ial = IAllocator(self.cfg, self.rpc,
15328 instances=self.op.instances,
15329 target_groups=self.op.target_groups)
15330 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15331 ial = IAllocator(self.cfg, self.rpc,
15333 instances=self.op.instances,
15334 evac_mode=self.op.evac_mode)
15336 raise errors.ProgrammerError("Uncatched mode %s in"
15337 " LUTestAllocator.Exec", self.op.mode)
15339 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15340 result = ial.in_text
15342 ial.Run(self.op.allocator, validate=False)
15343 result = ial.out_text
15347 #: Query type implementations
15349 constants.QR_CLUSTER: _ClusterQuery,
15350 constants.QR_INSTANCE: _InstanceQuery,
15351 constants.QR_NODE: _NodeQuery,
15352 constants.QR_GROUP: _GroupQuery,
15353 constants.QR_OS: _OsQuery,
15354 constants.QR_EXPORT: _ExportQuery,
15357 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15360 def _GetQueryImplementation(name):
15361 """Returns the implemtnation for a query type.
15363 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15367 return _QUERY_IMPL[name]
15369 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15370 errors.ECODE_INVAL)